Packages to be installed
Following Packages are needed for the project-
#including required packages
suppressWarnings(suppressMessages(library(tidyverse)))
suppressWarnings(suppressMessages(library(tidyr)))
suppressWarnings(suppressMessages(library(fpp3)))
suppressWarnings(suppressMessages(library(stargazer)))
suppressWarnings(suppressMessages(library(tsibble)))
suppressWarnings(suppressMessages(library(latex2exp)))
suppressWarnings(suppressMessages(library(magrittr)))
suppressWarnings(suppressMessages(library(stringr)))
suppressWarnings(suppressMessages(library(seasonal)))
suppressWarnings(suppressMessages(library(ggplot2)))
suppressWarnings(suppressMessages(library(ggfortify)))
suppressWarnings(suppressMessages(library(dplyr)))
suppressWarnings(suppressMessages(library(magrittr)))
suppressWarnings(suppressMessages(library(tidyr)))
suppressWarnings(suppressMessages(library(ggplot2)))
suppressWarnings(suppressMessages(library(stats)))
suppressWarnings(suppressMessages(library(factoextra)))
suppressWarnings(suppressMessages(library(knitr)))
suppressWarnings(suppressMessages(library(kableExtra)))
suppressWarnings(suppressMessages(library(purrr)))
suppressWarnings(suppressMessages(library(gridExtra)))
suppressWarnings(suppressMessages(library(scales)))
Loading Tables
Here all the datasets are loaded.
2.1.Data Source
The data is downloaded from https://fred.stlouisfed.org/
#reading data from excel
library(readxl)
#real personal consumption expenditure: durable goods
data_rpce_durable_goods <- read_excel("D:\\Sem 2 - Spring 2022\\ECON 825\\Project\\Consumer goods Data\\RPCE\\ND000346Q (1).xls" )
#industrial production: durable goods
data_industrial_prod_durable_goods <- read_excel("D:\\Sem 2 - Spring 2022\\ECON 825\\Project\\Consumer goods Data\\IP\\IPB51100N.xls")
#Producer Price index by commodity : durable goods
data_PPI_durable_goods <- read_excel("D:\\Sem 2 - Spring 2022\\ECON 825\\Project\\Consumer goods Data\\PPI\\WPSFD413112.xls")
#Consumer Price index by commodity : durable goods
data_CPI_durable_goods <- read_excel("D:\\Sem 2 - Spring 2022\\ECON 825\\Project\\Consumer goods Data\\CPI\\CUSR0000SAD.xls")
#GDP
data_GDP <- read_excel("D:\\Sem 2 - Spring 2022\\ECON 825\\Project\\GDP\\Quaterly-non seasonal adjusted\\ND000334Q.xls")
# Non-Durable
#real personal consumption expenditure: non durable goods
data_rpce_ndurable_goods <- read_excel("D:\\Sem 2 - Spring 2022\\ECON 825\\Project\\Consumer goods Data\\Non-Durable\\RPCE\\ND000348Q.xls" )
#industrial production: non durable goods
data_industrial_prod_ndurable_goods <- read_excel("D:\\Sem 2 - Spring 2022\\ECON 825\\Project\\Consumer goods Data\\Non-Durable\\IP\\IPB51200N.xls")
#Producer Price index by commodity : non durable goods
data_PPI_ndurable_goods <- read_excel("D:\\Sem 2 - Spring 2022\\ECON 825\\Project\\Consumer goods Data\\Non-Durable\\PPI\\WPSFD49508.xls")
#Consumer Price index by commodity : non durable goods
data_CPI_ndurable_goods <- read_excel("D:\\Sem 2 - Spring 2022\\ECON 825\\Project\\Consumer goods Data\\Non-Durable\\CPI\\CUSR0000SAN.xls")
#Services
#real personal consumption
data_rpce_services<-read_excel("D:\\Sem 2 - Spring 2022\\ECON 825\\Project\\Consumer goods Data\\Services\\ND000350Q.xls")
#all employees services providing
data_ae_sp<-read.csv("D:\\Sem 2 - Spring 2022\\ECON 825\\Project\\Consumer goods Data\\Services\\SRVPRD.csv", header = TRUE) %>%
mutate(Month = yearmonth(DATE)) %>%as_tsibble(index=Month)
#net export
data_netexp<-read_excel("D:\\Sem 2 - Spring 2022\\ECON 825\\Project\\Consumer goods Data\\Services\\ND000374Q.xls")
#Real disposable income
data_rdpi<-read_excel("D:\\Sem 2 - Spring 2022\\ECON 825\\Project\\Real disposable income\\Quaterly-seasonal adjusted\\DPIC96.xls") %>%
mutate(Quarter = yearquarter(observation_date)) %>%as_tsibble(index=Quarter)
2.2.Modifying Data as per requirements
#converting personal consumption expenditure to a % of expenditure in GDP
data_rpce_durable_goods_gdp <- inner_join(data_rpce_durable_goods,data_GDP, by = "observation_date")
rpce_durable_goods_gdp <- data_rpce_durable_goods_gdp %>% mutate(Quarter = yearquarter(observation_date), Expenditure = (ND000346Q*100/ND000334Q))%>%
select(Quarter,Expenditure) %>% as_tsibble(index = Quarter)
industrial_prod_durable_goods <- data_industrial_prod_durable_goods %>% mutate(Month = yearmonth(observation_date), Index = IPB51100N)%>% mutate(Index = Index + 61.6696) %>%
select(Month,Index) %>% as_tsibble(index = Month)
PPI_durable_goods <- data_PPI_durable_goods %>% mutate(Month = yearmonth(observation_date), Index = WPSFD413112)%>%
select(Month,Index) %>% as_tsibble(index = Month)
CPI_durable_goods <- data_CPI_durable_goods %>% mutate(Month = yearmonth(observation_date), Index = CUSR0000SAD) %>% mutate(Index = Index + 3.7)%>%
select(Month,Index) %>% as_tsibble(index = Month)
GDP <- data_GDP %>% mutate(Quarter = yearquarter(observation_date), GDP = ND000334Q)%>%
select(Quarter,GDP) %>% as_tsibble(index = Quarter)
head(rpce_durable_goods_gdp)
## # A tsibble: 6 x 2 [1Q]
## Quarter Expenditure
## <qtr> <dbl>
## 1 2002 Q1 5.72
## 2 2002 Q2 6.00
## 3 2002 Q3 6.13
## 4 2002 Q4 6.46
## 5 2003 Q1 5.68
## 6 2003 Q2 6.32
head(industrial_prod_durable_goods)
## # A tsibble: 6 x 2 [1M]
## Month Index
## <mth> <dbl>
## 1 1947 Jan 73.2
## 2 1947 Feb 73.9
## 3 1947 Mar 74.3
## 4 1947 Apr 74.3
## 5 1947 May 73.9
## 6 1947 Jun 74.2
head(PPI_durable_goods)
## # A tsibble: 6 x 2 [1M]
## Month Index
## <mth> <dbl>
## 1 1947 Apr 32.5
## 2 1947 May 32.6
## 3 1947 Jun 32.7
## 4 1947 Jul 32.8
## 5 1947 Aug 33.2
## 6 1947 Sep 33.4
head(CPI_durable_goods)
## # A tsibble: 6 x 2 [1M]
## Month Index
## <mth> <dbl>
## 1 1956 Jan 39.4
## 2 1956 Feb 39.5
## 3 1956 Mar 39.5
## 4 1956 Apr 39.4
## 5 1956 May 39.5
## 6 1956 Jun 39.6
head(data_rdpi)
## # A tsibble: 6 x 3 [1Q]
## observation_date DPIC96 Quarter
## <dttm> <dbl> <qtr>
## 1 1947-01-01 00:00:00 1395. 1947 Q1
## 2 1947-04-01 00:00:00 1382. 1947 Q2
## 3 1947-07-01 00:00:00 1418. 1947 Q3
## 4 1947-10-01 00:00:00 1399. 1947 Q4
## 5 1948-01-01 00:00:00 1425. 1948 Q1
## 6 1948-04-01 00:00:00 1469. 1948 Q2
#non durables
#converting personal consumption expenditure to a % of expenditure in GDP
data_rpce_ndurable_goods_gdp <- inner_join(data_rpce_ndurable_goods,data_GDP, by = "observation_date")
rpce_ndurable_goods_gdp <- data_rpce_ndurable_goods_gdp %>% mutate(Quarter = yearquarter(observation_date), Expenditure = (ND000348Q*100/ND000334Q))%>%
select(Quarter,Expenditure) %>% as_tsibble(index = Quarter)
industrial_prod_ndurable_goods <- data_industrial_prod_ndurable_goods %>% mutate(Month = yearmonth(observation_date), Index = IPB51200N)%>% mutate(Index = Index + 24.0739) %>%
select(Month,Index) %>% as_tsibble(index = Month)
PPI_ndurable_goods <- data_PPI_ndurable_goods %>% mutate(Month = yearmonth(observation_date), Index = WPSFD49508)%>%
select(Month,Index) %>% as_tsibble(index = Month)
CPI_ndurable_goods <- data_CPI_ndurable_goods %>% mutate(Month = yearmonth(observation_date), Index = CUSR0000SAN) %>% mutate(Index = Index + 1.4)%>%
select(Month,Index) %>% as_tsibble(index = Month)
GDP <- data_GDP %>% mutate(Quarter = yearquarter(observation_date), GDP = ND000334Q)%>%
select(Quarter,GDP) %>% as_tsibble(index = Quarter)
head(rpce_ndurable_goods_gdp)
## # A tsibble: 6 x 2 [1Q]
## Quarter Expenditure
## <qtr> <dbl>
## 1 2002 Q1 15.4
## 2 2002 Q2 15.7
## 3 2002 Q3 15.6
## 4 2002 Q4 17.3
## 5 2003 Q1 15.4
## 6 2003 Q2 15.9
head(industrial_prod_ndurable_goods)
## # A tsibble: 6 x 2 [1M]
## Month Index
## <mth> <dbl>
## 1 1947 Jan 47.7
## 2 1947 Feb 47.7
## 3 1947 Mar 47.4
## 4 1947 Apr 46.4
## 5 1947 May 46.4
## 6 1947 Jun 47.1
head(PPI_ndurable_goods)
## # A tsibble: 6 x 2 [1M]
## Month Index
## <mth> <dbl>
## 1 1947 Apr 24.1
## 2 1947 May 24.2
## 3 1947 Jun 24.3
## 4 1947 Jul 24.2
## 5 1947 Aug 24.3
## 6 1947 Sep 24.4
head(CPI_ndurable_goods)
## # A tsibble: 6 x 2 [1M]
## Month Index
## <mth> <dbl>
## 1 1956 Jan 30.9
## 2 1956 Feb 30.8
## 3 1956 Mar 30.9
## 4 1956 Apr 31
## 5 1956 May 31.2
## 6 1956 Jun 31.4
#Services
data_rpce_services_gdp <- inner_join(data_rpce_services,data_GDP, by = "observation_date")
data_rpce_services_gdp <- data_rpce_services_gdp %>% mutate(Quarter = yearquarter(observation_date), Expenditure = (ND000350Q*100/ND000334Q))%>%
select(Quarter,Expenditure) %>% as_tsibble(index = Quarter)
data_netexp_gdp <- inner_join(data_netexp,data_GDP, by = "observation_date")
data_netexp_gdp <- data_netexp_gdp %>% mutate(Quarter = yearquarter(observation_date), Expenditure = (ND000374Q*100/ND000334Q))%>%
select(Quarter,Expenditure) %>% as_tsibble(index = Quarter)
head(data_rpce_services_gdp)
## # A tsibble: 6 x 2 [1Q]
## Quarter Expenditure
## <qtr> <dbl>
## 1 2002 Q1 46.9
## 2 2002 Q2 45.7
## 3 2002 Q3 45.5
## 4 2002 Q4 44.9
## 5 2003 Q1 46.9
## 6 2003 Q2 45.7
head(data_netexp_gdp)
## # A tsibble: 6 x 2 [1Q]
## Quarter Expenditure
## <qtr> <dbl>
## 1 2002 Q1 -4.53
## 2 2002 Q2 -5.00
## 3 2002 Q3 -5.51
## 4 2002 Q4 -5.41
## 5 2003 Q1 -5.13
## 6 2003 Q2 -5.59
head(data_ae_sp)
## # A tsibble: 6 x 3 [1M]
## DATE SRVPRD Month
## <chr> <int> <mth>
## 1 1939-01-01 18825 1939 Jan
## 2 1939-02-01 18879 1939 Feb
## 3 1939-03-01 18897 1939 Mar
## 4 1939-04-01 18912 1939 Apr
## 5 1939-05-01 19001 1939 May
## 6 1939-06-01 19055 1939 Jun
3.1.Real Personal Consumption Expenditure: Durable Goods
Looking at time series
#plotting graphs to see our time series
g1<- rpce_durable_goods_gdp %>%
autoplot(Expenditure) + ggtitle("Real personal consumption Expenditure: Durable Goods") + ylab("% of GDP")
g2<- rpce_durable_goods_gdp %>%
gg_season(Expenditure) + ggtitle("Real personal consumption Expenditure: Durable Goods")+ylab("% of GDP")
g3<- rpce_durable_goods_gdp %>%
gg_subseries(Expenditure) + ggtitle("Real personal consumption Expenditure: Durable Goods")+ylab("% of GDP")
grid.arrange(g1, g2,g3, nrow = 3)
Observations for Real Personal Consumption Expenditure: Durable Goods
#Checking for different components in the series
rpce_durable_goods_gdp %>%
model(
STL(Expenditure)) %>%
components() %>%
autoplot()+
labs(title = "STL decomposition of RPCE: Consumer goods")
Forecast: RPCE
We will build two competitive models : ARIMA and ETS on training data and check the best performing model on test data set for every forecasting exercise we will do by looking at its accuracy
Using ARIMA Model
#transform the data and stabilize as there is variation in the series which is increasing with time
#Box-Cox transformation
lambda1 <- rpce_durable_goods_gdp %>%
features(Expenditure, features = guerrero) %>%
pull(lambda_guerrero)
rpce_durable_goods_gdp %>%
autoplot(box_cox(Expenditure, lambda1)) +
labs(y = "", title = TeX(paste0("Transformed Real personal consumption expenditure as % of GDP for Durable Goods with $\\lambda$ = ",
round(lambda1,2))))
#Checking for stationary
rpce_durable_goods_gdp %>% ACF(Expenditure) %>% autoplot()+ggtitle("ACF plot for Real Personal Consumption Expenditure: Durable goods")
#Running KPSS to statistically justify that data is stationary or not and find order of the differincing - Null hypothesis (Ho): Data is stationary - Alternate hypothesis (Halt): Data is non-stationary
rpce_durable_goods_gdp %>% features(box_cox(Expenditure, lambda1), unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 1.87 0.01
#no. of seasonal difference
rpce_durable_goods_gdp %>% mutate(t_Expenditure = box_cox(Expenditure,lambda1)) %>%
features(t_Expenditure, unitroot_nsdiffs)
## # A tibble: 1 x 1
## nsdiffs
## <int>
## 1 1
#no. of regular difference
rpce_durable_goods_gdp %>% mutate(d_Expenditure = difference(box_cox(Expenditure,lambda1),4)) %>% features(d_Expenditure, unitroot_ndiffs)
## # A tibble: 1 x 1
## ndiffs
## <int>
## 1 0
rpce_durable_goods_gdp %>%
autoplot(box_cox(Expenditure,lambda1)%>% difference(lag=4))+
ggtitle("Real personal consumption Expenditure: Durable Goods(Differenced timeseries)")+ylab("% of GDP")
#KPSS on box-cox transformed seasonal differenced time series
rpce_durable_goods_gdp %>% mutate(t_Expenditure = difference(box_cox(Expenditure,lambda1),4)) %>%
features(t_Expenditure, unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.293 0.1
#checking residuals of differenced time series
rpce_durable_goods_gdp %>%
gg_tsdisplay(box_cox(Expenditure,lambda1) %>% difference(lag=4), plot_type="partial")+
ggtitle("Residuals of differenced RPCE: Durable Goods")
ACF plot
PACF plot
#test data set
rpce_test <- rpce_durable_goods_gdp %>%
filter_index("2019 Q1" ~.)
#training data set
rpce_train <- rpce_durable_goods_gdp %>%
filter_index(~ "2018 Q4")
arima_all_models1 <- rpce_train %>%
model(
arima003110 = ARIMA(box_cox(Expenditure,lambda1) ~ pdq(0,0,3) + PDQ(1,1,0)),
arima001110 = ARIMA(box_cox(Expenditure,lambda1) ~ pdq(0,0,1) + PDQ(1,1,0)),
arima100110 = ARIMA(box_cox(Expenditure,lambda1) ~ pdq(1,0,0) + PDQ(1,1,0)),
arima100011 = ARIMA(box_cox(Expenditure,lambda1) ~ pdq(0,0,2) + PDQ(1,1,0)),
auto_arima = ARIMA(box_cox(Expenditure,lambda1), stepwise = FALSE, approx = FALSE)
)
glance(arima_all_models1) %>% arrange(AICc)
## # A tibble: 4 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 auto_arima 0.0000118 273. -537. -536. -529. <cpl [1]> <cpl [4]>
## 2 arima100110 0.0000132 269. -531. -530. -522. <cpl [5]> <cpl [0]>
## 3 arima100011 0.0000180 260. -511. -509. -500. <cpl [4]> <cpl [2]>
## 4 arima001110 0.0000207 255. -503. -502. -494. <cpl [4]> <cpl [1]>
best_arima_model1 <- arima_all_models1 %>% select(auto_arima)
report(best_arima_model1)
## Series: Expenditure
## Model: ARIMA(1,0,0)(0,1,1)[4] w/ drift
## Transformation: box_cox(Expenditure, lambda1)
##
## Coefficients:
## ar1 sma1 constant
## 0.9127 -0.6607 3e-04
## s.e. 0.0507 0.1181 1e-04
##
## sigma^2 estimated as 1.179e-05: log likelihood=272.58
## AIC=-537.15 AICc=-536.48 BIC=-528.52
Best ARIMA model-RPCE: ARIMA(1,0,0)(0,1,1)[4] w/ drift
#looking at residuals
best_arima_model1 %>%
gg_tsresiduals()+ ggtitle("Residual analysis of RPCE Arima model")
#Doing Ljung box test on residuals
Box.test(augment(best_arima_model1)$.resid, lag=36, fitdf = 2, type = "Lj")
##
## Box-Ljung test
##
## data: augment(best_arima_model1)$.resid
## X-squared = 25.278, df = 34, p-value = 0.8603
Observations
Using ETS model
stl <- decomposition_model(
STL(box_cox(Expenditure,lambda1)),
ETS(season_adjust ~ error("A") + trend("A") + season("N"))
)
ets_all_models1 <- rpce_train %>%
model(
AAdA = ETS(box_cox(Expenditure,lambda1) ~ error("A") + trend("Ad") + season("A")),
AAA = ETS(box_cox(Expenditure,lambda1) ~ error("A") + trend("A") + season("A")),
MAdM = ETS(box_cox(Expenditure,lambda1) ~ error("M") + trend("Ad") + season("M")),
stl_ets = stl,
ETS_auto = ETS(box_cox(Expenditure,lambda1))
)
accuracy(ets_all_models1)
## # A tibble: 5 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAdA Training 0.0286 0.133 0.104 0.358 1.46 0.384 0.430 0.0252
## 2 AAA Training -0.000524 0.131 0.0966 0.00339 1.38 0.357 0.422 0.109
## 3 MAdM Training 0.0163 0.198 0.145 0.182 2.09 0.536 0.641 0.728
## 4 stl_ets Training 0.00465 0.107 0.0799 0.0390 1.14 0.295 0.347 0.109
## 5 ETS_auto Training -0.000524 0.131 0.0966 0.00339 1.38 0.357 0.422 0.109
ets_all_models1 %>%
forecast(h = "4 years") %>%
autoplot(rpce_durable_goods_gdp) +
labs(y = "Expenditure(%of GDP)", title = "RPCE: Durable goods")
ets_all_models1 %>%select(stl_ets) %>%
forecast(h = "4 years") %>%
autoplot(rpce_durable_goods_gdp) +
labs(y = "Expenditure(%of GDP)", title = "RPCE: Durable goods")
Best ETS model-PRCE: Decomposition model with additive trend : stl_ets with lowest RMSE
best_ets_model1_1 <- ets_all_models1 %>%
select(stl_ets)
best_ets_model1_2 <- ets_all_models1 %>%
select(AAA)
best_ets_model1_1 %>%
gg_tsresiduals(lag_max = 24)+ggtitle("Residual analysis of RPCE Durable: ETS model")
best_ets_model1_2 %>%
gg_tsresiduals(lag_max = 24)+ggtitle("Residual analysis of RPCE Durable: ETS model")
#Box pierce test for residuals
Box.test(augment(best_ets_model1_1)$.resid, lag=24, fitdf = 5, type = "Lj")
##
## Box-Ljung test
##
## data: augment(best_ets_model1_1)$.resid
## X-squared = 18.963, df = 19, p-value = 0.4592
#Box pierce test for residuals
Box.test(augment(best_ets_model1_2)$.resid, lag=24, fitdf = 6, type = "Lj")
##
## Box-Ljung test
##
## data: augment(best_ets_model1_2)$.resid
## X-squared = 16.665, df = 18, p-value = 0.5462
Observations
Plotting both forecast
fc1 <- best_arima_model1 %>% forecast(h = "5 years")
fc2_1 <- best_ets_model1_1 %>% forecast(h = "5 years")
fc2_2 <- best_ets_model1_2 %>% forecast(h = "5 years")
r1<- fc1 %>%
autoplot(rpce_train) +
geom_line(data=rpce_test, aes(x=Quarter, y=Expenditure), col='red')+
ggtitle("RPCE: Durable Goods: Arima")+ylab("Expenditure as % of GDP")
r2<- fc2_1 %>%
autoplot(rpce_train) +
geom_line(data=rpce_test, aes(x=Quarter, y=Expenditure), col='red')+
ggtitle("RPCE: Durable Goods: ETS")+ylab("Expenditure as % of GDP")
r3<- fc2_2 %>%
autoplot(rpce_train) +
geom_line(data=rpce_test, aes(x=Quarter, y=Expenditure), col='red')+
ggtitle("RPCE: Durable Goods: ETS")+ylab("Expenditure as % of GDP")
grid.arrange(r1,r2,r3,nrow=3)
# Generate forecasts and compare accuracy over the test set
bind_rows(
best_arima_model1 %>% accuracy(),
best_ets_model1_1 %>% accuracy(),
best_ets_model1_2 %>% accuracy(),
best_arima_model1 %>% forecast(h = "4 years") %>% accuracy(rpce_test),
best_ets_model1_1 %>% forecast(h = "4 years") %>% accuracy(rpce_test),
best_ets_model1_2 %>% forecast(h = "4 years") %>% accuracy(rpce_test)
) %>%
select(-ME, -MPE, -ACF1)
## # A tibble: 6 x 7
## .model .type RMSE MAE MAPE MASE RMSSE
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 auto_arima Training 0.126 0.0934 1.34 0.345 0.406
## 2 stl_ets Training 0.107 0.0799 1.14 0.295 0.347
## 3 AAA Training 0.131 0.0966 1.38 0.357 0.422
## 4 auto_arima Test 1.11 0.848 7.71 NaN NaN
## 5 stl_ets Test 1.18 0.907 8.25 NaN NaN
## 6 AAA Test 1.02 0.776 7.08 NaN NaN
So, since RMSE of ETS(A,A,A) comes out to be lowest on test data. Hence, ETS(A,A,A) is a better model. We will forecast the real personal consumption expenditure as % of GDP for 1 year ahead by using ETS(A,A,A) model
3.2.Industrial Production: Durable Goods
Looking at time series
#plotting graphs to see our time series
g1<- industrial_prod_durable_goods %>%
autoplot(Index) + ggtitle("Industrial Production: Durable Goods") + ylab("Index")
g2<- industrial_prod_durable_goods %>%
gg_season(Index) + ggtitle("Industrial Production: Durable Goods") + ylab("Index")
grid.arrange(g1, g2, nrow = 2)
Observations for Industrial Production: Durable Goods
#Checking for different components in the series
industrial_prod_durable_goods %>%
model(
STL(Index)) %>%
components() %>%
autoplot()+
labs(title = "STL decomposition of Industrial Production: Consumer goods")
* So, there is a strong seasonality in the series which is increasing over time
Forecast: Industrial production
We will build two competitive models : ARIMA and ETS on training data and check the best performing model on test data set for every forecasting exercise we will do by looking at its accuracy
Using Arima Model
#transform the data and stabilize as there is variation in the series which is increasing with time
#Box-Cox transformation
lambda2 <- industrial_prod_durable_goods %>%
features(Index, features = guerrero) %>%
pull(lambda_guerrero)
g1<- industrial_prod_durable_goods %>%
autoplot(box_cox(Index, lambda2)) +
labs(y = "", title = TeX(paste0("Transformed Industrial Production for Durable Goods with $\\lambda$ = ", round(lambda2,2))))
g2<- industrial_prod_durable_goods %>%
autoplot(log(Index)) +
labs(y = "", title = "Log Transformed Industrial Production for Durable Goods")
#Step 2: Checking for stationary
g3<- industrial_prod_durable_goods %>% ACF(Index) %>% autoplot()+ ggtitle("ACF plot for Industrial production: Durable goods")
grid.arrange(g1,g2,g3, nrow=3)
#Running KPSS to statistically justify that data is stationary or not and find order of the differincing - Null hypothesis (Ho): Data is stationary - Alternate hypothesis (Halt): Data is non-stationary
industrial_prod_durable_goods %>% features(log(Index), unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 12.6 0.01
#no. of seasonal difference - industrial production
industrial_prod_durable_goods %>% mutate(t_Index = log(Index)) %>%
features(t_Index, unitroot_nsdiffs)
## # A tibble: 1 x 1
## nsdiffs
## <int>
## 1 1
#no. of regular difference - industrial production
industrial_prod_durable_goods %>% mutate(d_Index = difference(log(Index),12)) %>% features(d_Index, unitroot_ndiffs)
## # A tibble: 1 x 1
## ndiffs
## <int>
## 1 0
industrial_prod_durable_goods %>%
autoplot(log(Index)%>% difference(lag=12))+
ggtitle("Industrial Production: Durable Goods(Differenced timeseries)")+ylab("Index")
#KPSS on box-cox transformed differenced time series
industrial_prod_durable_goods %>% mutate(d_Index = difference(log(Index),12)) %>% features(d_Index, unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.0938 0.1
#checking residuals of differenced time series
industrial_prod_durable_goods %>%
gg_tsdisplay(log(Index) %>% difference(lag=12), plot_type="partial")+
ggtitle("Residuals of differenced Industrial Production: Durable Goods")
ACF
PACF
We can see negative spike at lag 12. This is seasonal spikes. Further these seasonal spike seems to be decaying exponentially over time. This suggest AR(1) seasonal component
Significant partial autocorrelation is seen at lag 1. This is non- seasonal spike which suggest AR(1) non-seasonal component
Moodels suggested :
#test data set
ip_test <- industrial_prod_durable_goods %>%
filter_index("2019 Jan" ~.)
#training data
ip_train <- industrial_prod_durable_goods %>%
filter_index(~ "2018 Dec")
arima_all_models2 <- ip_train %>%
model(
arima100110 = ARIMA(log(Index) ~ pdq(1,0,0) + PDQ(1,1,0)),
arima001011 = ARIMA(log(Index) ~ pdq(0,0,1) + PDQ(0,1,1)),
arima100011 = ARIMA(log(Index) ~ pdq(1,0,0) + PDQ(0,1,1)),
arima001110 = ARIMA(log(Index) ~ pdq(0,0,1) + PDQ(1,1,0)),
auto_arima1 = ARIMA(log(Index), stepwise = FALSE, approx = FALSE)
)
glance(arima_all_models2) %>% arrange(AICc)
## # A tibble: 5 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 auto_arima1 0.000178 2468. -4923. -4923. -4889. <cpl [2]> <cpl [14]>
## 2 arima100011 0.000182 2459. -4909. -4909. -4890. <cpl [1]> <cpl [12]>
## 3 arima100110 0.000205 2410. -4812. -4812. -4793. <cpl [13]> <cpl [0]>
## 4 arima001011 0.000632 1931. -3853. -3853. -3834. <cpl [0]> <cpl [13]>
## 5 arima001110 0.000635 1929. -3849. -3849. -3830. <cpl [12]> <cpl [1]>
best_arima_model2 <- arima_all_models2 %>% select(auto_arima1)
report(best_arima_model2)
## Series: Index
## Model: ARIMA(2,0,2)(0,1,1)[12] w/ drift
## Transformation: log(Index)
##
## Coefficients:
## ar1 ar2 ma1 ma2 sma1 constant
## 1.7709 -0.7822 -0.9404 0.1615 -0.6200 1e-04
## s.e. 0.1370 0.1312 0.1382 0.0356 0.0297 0e+00
##
## sigma^2 estimated as 0.0001782: log likelihood=2468.36
## AIC=-4922.72 AICc=-4922.59 BIC=-4889.49
Best ARIMA model-Industrial Production: ARIMA(2,0,2)(0,1,1)[12] w/ drift
#looking at residuals
best_arima_model2 %>%
gg_tsresiduals()+ ggtitle("Residual analysis of Industrial Production Arima model")
#Doing Ljung box test on residuals
Box.test(augment(best_arima_model2)$.resid, lag=36, fitdf = 5, type = "Lj")
##
## Box-Ljung test
##
## data: augment(best_arima_model2)$.resid
## X-squared = 43.882, df = 31, p-value = 0.06247
Observations
Using ETS Model
ets_all_models2 <- ip_train %>%
model(
AAdA = ETS(log(Index) ~ error("A") + trend("Ad") + season("A")),
AAA = ETS(log(Index) ~ error("A") + trend("A") + season("A")),
MAM = ETS(log(Index) ~ error("M") + trend("A") + season("M")),
MAdM = ETS(log(Index) ~ error("M") + trend("Ad") + season("M")),
ETS = ETS(log(Index))
)
accuracy(ets_all_models2)
## # A tibble: 5 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAdA Training 0.138 1.65 1.20 0.109 1.03 0.333 0.340 0.0947
## 2 AAA Training 0.00108 1.65 1.20 -0.00770 1.04 0.334 0.339 0.142
## 3 MAM Training 0.0127 1.71 1.27 0.00140 1.11 0.352 0.354 0.325
## 4 MAdM Training 0.153 1.64 1.21 0.122 1.04 0.335 0.339 0.158
## 5 ETS Training 0.134 1.65 1.20 0.104 1.03 0.334 0.340 0.0769
ets_all_models2 %>%
forecast(h = "4 years") %>%
autoplot(industrial_prod_durable_goods %>% filter(year(Month)>2015)) +
labs(y = "Index", title = "Industrial Production: Durable goods")
Best ETS Model- Industrial Production: ETS(M,Ad,M)
best_ets_model2 <- ets_all_models2 %>%
select(MAdM)
best_ets_model2 %>%
gg_tsresiduals(lag_max = 24)+ggtitle("Residual analysis of Industrial production Durable: ETS model")
#Doing Ljung box test on residuals
Box.test(augment(best_ets_model2)$.resid, lag=36, fitdf = 17, type = "Lj")
##
## Box-Ljung test
##
## data: augment(best_ets_model2)$.resid
## X-squared = 151.06, df = 19, p-value < 2.2e-16
Observations
Plotting both forecast
fc3 <- best_arima_model2 %>% forecast(h = "3 years")
fc4 <- best_ets_model2%>% forecast(h = "3 years")
i1<- fc3 %>%
autoplot(ip_train %>% filter(year(Month)>2015)) +
geom_line(data=(ip_test%>% filter(year(Month)>2015)), aes(x=Month, y=(Index)), col='red')+
ggtitle("Industrial Production: Durable Goods: Arima")+ylab("Index")
i2<- fc4 %>%
autoplot(ip_train%>% filter(year(Month)>2015)) +
geom_line(data=(ip_test%>% filter(year(Month)>2015)), aes(x=Month, y=(Index)), col='red')+
ggtitle("Industrial Production: Durable Goods: ETS")+ylab("Index")
grid.arrange(i1,i2,nrow=2)
# Generate forecasts and compare accuracy over the test set
bind_rows(
best_arima_model2 %>% accuracy(),
best_ets_model2 %>% accuracy(),
best_arima_model2 %>% forecast(h = "4 years") %>% accuracy(ip_test),
best_ets_model2%>% forecast(h = "4 years") %>% accuracy(ip_test)
) %>%
select(-ME, -MPE, -ACF1)
## # A tibble: 4 x 7
## .model .type RMSE MAE MAPE MASE RMSSE
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 auto_arima1 Training 1.59 1.15 0.979 0.318 0.328
## 2 MAdM Training 1.64 1.21 1.04 0.335 0.339
## 3 auto_arima1 Test 12.7 7.35 5.04 NaN NaN
## 4 MAdM Test 11.5 5.35 3.79 NaN NaN
So, since RMSE of ETS(M,Ad,M) comes out to be lowest on test data. Hence, ETS(M,Ad,M) is a better model. We will forecast the industrial production of durable goods for 1 year ahead by using ETS(M,Ad,M) model
3.3.Producer Price Index by Commodity: Durable Goods
Looking at time series
#plotting graphs to see our time series
g1<- PPI_durable_goods %>%
autoplot(Index) + ggtitle("Producer Price Index: Durable Goods") + ylab("Index")
g2<- PPI_durable_goods %>%
gg_season(Index) + ggtitle("Producer Price Index: Durable Goods") + ylab("Index")
grid.arrange(g1, g2, nrow = 2)
Observations for Producer Price Index by Commodities:Final Demand: Durable Goods
#Checking for different components in the series
PPI_durable_goods %>%
model(
STL(Index)) %>%
components() %>%
autoplot()+
labs(title = "STL decomposition of PPI: Consumer goods")
Forecast: PPI-Durable goods
We will build two competitive models : ARIMA and ETS on training data and check the best performing model on test data set for every forecasting exercise we will do by looking at its accuracy
Using ARIMA
#We will only take log transformations as we are looking into the growths
#Step 2: Checking for stationary
PPI_durable_goods %>% ACF(Index) %>% autoplot()+ggtitle("ACF plot for PPI: Durable goods")
#Running KPSS to statistically justify that data is stationary or not and find order of the differincing - Null hypothesis (Ho): Data is stationary - Alternate hypothesis (Halt): Data is non-stationary
PPI_durable_goods %>% features(log(Index), unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 12.6 0.01
#no. of seasonal difference - PPI
PPI_durable_goods %>% mutate(t_Index = log(Index)) %>%
features(t_Index, unitroot_nsdiffs)
## # A tibble: 1 x 1
## nsdiffs
## <int>
## 1 0
#no. of regular difference - PPI
PPI_durable_goods %>% features(log(Index), unitroot_ndiffs)
## # A tibble: 1 x 1
## ndiffs
## <int>
## 1 2
PPI_durable_goods %>%
autoplot(log(Index)%>% difference() %>% difference())+
ggtitle("PPI: Durable Goods(Differenced timeseries)")+ylab("Index")
#KPSS on box-cox transformed differenced time series
PPI_durable_goods %>% mutate(d_Index = difference(difference(log(Index)))) %>% features(d_Index, unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.0118 0.1
#checking residuals of differenced time series
PPI_durable_goods %>%
gg_tsdisplay(log(Index) %>% difference() %>% difference(), plot_type="partial", lag_max = 36)+
ggtitle("Residuals of differenced PPI: Durable Goods")
ACF
PACF
The lags can be seen to be decaying exponentially. There is some negative partial autocorrelation at lag1, lag2, lag3 , lag4, and lag5. We can consider lag 1 and lag2 to be significant, which suggests AR(1) or AR(2) non-seasonal components
Suggested Model
#test data set
ppi_test <- PPI_durable_goods %>%
filter_index("2019 Jan" ~.)
ppi_train <- PPI_durable_goods %>%
filter_index(~ "2018 Dec")
arima_all_models3 <- ppi_train %>%
model(
arima021= ARIMA(log(Index) ~ pdq(0,2,1) + PDQ(0,0,0)),
arima120 = ARIMA(log(Index) ~ pdq(1,2,0) + PDQ(0,0,0)),
arima022 = ARIMA(log(Index) ~ pdq(0,2,2) + PDQ(0,0,0)),
arima220= ARIMA(log(Index) ~ pdq(2,2,0) + PDQ(0,0,0)),
arima123= ARIMA(log(Index) ~ pdq(1,2,3) + PDQ(0,0,0)),
arima_model = ARIMA(log(Index), stepwise = FALSE, approx = FALSE)
)
glance(arima_all_models3) %>% arrange(AICc)
## # A tibble: 6 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima_model 0.0000153 3545. -7077. -7076. -7043. <cpl [24]> <cpl [26]>
## 2 arima021 0.0000162 3520. -7036. -7036. -7026. <cpl [0]> <cpl [1]>
## 3 arima022 0.0000162 3521. -7035. -7035. -7021. <cpl [0]> <cpl [2]>
## 4 arima123 0.0000162 3522. -7034. -7034. -7010. <cpl [1]> <cpl [3]>
## 5 arima220 0.0000192 3448. -6889. -6889. -6875. <cpl [2]> <cpl [0]>
## 6 arima120 0.0000224 3380. -6756. -6756. -6746. <cpl [1]> <cpl [0]>
best_arima_model3 <- arima_all_models3 %>% select(arima_model)
report(best_arima_model3)
## Series: Index
## Model: ARIMA(0,2,2)(2,0,2)[12]
## Transformation: log(Index)
##
## Coefficients:
## ma1 ma2 sar1 sar2 sma1 sma2
## -0.9752 0.0855 0.7056 -0.4323 -0.812 0.3123
## s.e. 0.0343 0.0366 0.1447 0.1365 0.150 0.1499
##
## sigma^2 estimated as 1.53e-05: log likelihood=3545.25
## AIC=-7076.5 AICc=-7076.37 BIC=-7043.21
Best ARIMA model-PPI: ARIMA(0,2,2)(2,0,2)[12]
#looking at residuals
best_arima_model3 %>%
gg_tsresiduals()+ ggtitle("Residual analysis of PPI Arima model")
#Doing Ljung box test on residuals
Box.test(augment(best_arima_model3)$.resid, lag=36, fitdf = 6, type = "Lj")
##
## Box-Ljung test
##
## data: augment(best_arima_model3)$.resid
## X-squared = 66.318, df = 30, p-value = 0.0001484
Observations
Using ETS Model
stl <- decomposition_model(
STL(Index),
ETS(season_adjust ~ error("A") + trend("Ad") + season("N"))
)
ets_all_models3 <- ppi_train %>%
model(
snaive = SNAIVE(log(Index)),
AAdN = ETS(log(Index) ~ error("A") + trend("Ad") + season("N")),
AAN = ETS(log(Index) ~ error("A") + trend("A") + season("N")),
MAN = ETS(log(Index) ~ error("M") + trend("A") + season("N")),
MAdN = ETS(log(Index) ~ error("M") + trend("Ad") + season("N")),
ETS = ETS(log(Index)),
ETS1 = ETS(log(Index)),
stl_ets = stl
)
accuracy(ets_all_models3)
## # A tibble: 8 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 snaive Training 1.81 2.72 1.95 2.16 2.34 1 1 0.964
## 2 AAdN Training 0.0183 0.381 0.233 0.0227 0.259 0.120 0.140 -0.148
## 3 AAN Training -0.00570 0.381 0.234 -0.00511 0.260 0.120 0.140 -0.148
## 4 MAN Training -0.00511 0.384 0.235 -0.00468 0.260 0.120 0.141 -0.180
## 5 MAdN Training 0.0174 0.383 0.234 0.0219 0.259 0.120 0.141 -0.176
## 6 ETS Training 0.0183 0.381 0.233 0.0227 0.259 0.120 0.140 -0.148
## 7 ETS1 Training 0.0183 0.381 0.233 0.0227 0.259 0.120 0.140 -0.148
## 8 stl_ets Training 0.0317 0.362 0.231 0.0404 0.258 0.118 0.133 0.00915
ets_all_models3 %>%
forecast(h = "6 years") %>%
autoplot(PPI_durable_goods) +
labs(y = "Index", title = "PPI: Durable goods")
Best ETS Model- PPI Durable goods: ETS(A,Ad,N) on seasonally adjusted data-stl_ets
best_ets_model3 <- ets_all_models3 %>%
select(stl_ets)
best_ets_model3 %>%
gg_tsresiduals(lag_max = 24)+ggtitle("Residual analysis of PPI Durable: ETS model")
#Doing Ljung box test on residuals
Box.test(augment(best_ets_model3)$.resid, lag=36, fitdf = 5, type = "Lj")
##
## Box-Ljung test
##
## data: augment(best_ets_model3)$.resid
## X-squared = 161.86, df = 31, p-value < 2.2e-16
Observations
Plotting both forecasts
fc5 <- best_arima_model3 %>% forecast(h = "5 years")
fc6 <- best_ets_model3 %>% forecast(h = "5 years")
p1<- fc5 %>%
autoplot(ppi_train %>% filter(year(Month)>2000)) +
geom_line(data=(ppi_test%>% filter(year(Month)>2000)), aes(x=Month, y=Index), col='red')+
ggtitle("PPI: Durable Goods: ARIMA")+ylab("Index")
p2<- fc6 %>%
autoplot(ppi_train%>% filter(year(Month)>2000)) +
geom_line(data=(ppi_test%>% filter(year(Month)>2000)), aes(x=Month, y=Index), col='red')+
ggtitle("PPI: Durable Goods: ETS")+ylab("Index")
grid.arrange(p1,p2,ncol=2)
# Generate forecasts and compare accuracy over the test set
bind_rows(
best_arima_model3 %>% accuracy(),
best_ets_model3 %>% accuracy(),
best_arima_model3 %>% forecast(h = "4 years") %>% accuracy(ppi_test),
best_ets_model3 %>% forecast(h = "4 years") %>% accuracy(ppi_test)
) %>%
select(-ME, -MPE, -ACF1)
## # A tibble: 4 x 7
## .model .type RMSE MAE MAPE MASE RMSSE
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima_model Training 0.370 0.232 0.259 0.119 0.136
## 2 stl_ets Training 0.362 0.231 0.258 0.118 0.133
## 3 arima_model Test 4.72 2.88 1.65 NaN NaN
## 4 stl_ets Test 5.52 3.25 1.85 NaN NaN
Clearly, ARIMA model performed better than ETS on test data here as RMSE of arima model is lesser. So we will use ARIMA model to forecast PPI
3.4.Consumer Price Index for all Urban Consumers: Durable Goods
Looking at time series
#plotting graphs to see our time series
g1<- CPI_durable_goods %>%
autoplot(Index) + ggtitle("Consumer Price Index: Durable Goods")
g2<- CPI_durable_goods %>%
gg_season(Index) + ggtitle("Consumer Price Index: Durable Goods")
grid.arrange(g1, g2, nrow = 2)
Observations for Consumer Price Index for all Urban Consumers: Durable Goods
Forecast: CPI-durable goods
We will build two competitive models : ARIMA and ETS on training data and check the best performing model on test data set for every forecasting exercise we will do by looking at its accuracy
Using ARIMA Model
#we will se at log transformed series
CPI_durable_goods %>%
autoplot(log(Index)) +
labs(y = "", title = "Log Transformed Consumer Price Index for Durable Goods")
#Step 2: Checking for stationary
CPI_durable_goods %>% ACF(Index) %>% autoplot()+ggtitle("ACF plot for CPI: Durable goods")
#Running KPSS to statistically justify that data is stationary or not and find order of the differincing - Null hypothesis (Ho): Data is stationary - Alternate hypothesis (Halt): Data is non-stationary
CPI_durable_goods %>% features(log(Index), unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 9.45 0.01
#no. of seasonal difference - Consumer price index
CPI_durable_goods %>% mutate(t_Index = log(Index)) %>%
features(t_Index, unitroot_nsdiffs)
## # A tibble: 1 x 1
## nsdiffs
## <int>
## 1 0
#no. of regular difference - industrial production
CPI_durable_goods %>% features(log(Index), unitroot_ndiffs)
## # A tibble: 1 x 1
## ndiffs
## <int>
## 1 2
CPI_durable_goods %>%
autoplot(log(Index)%>% difference() %>% difference())+
ggtitle("CPI: Durable Goods(Differenced timeseries)")+ylab("Index")
#KPSS on box-cox transformed differenced time series
CPI_durable_goods %>% mutate(d_Index = difference(difference(log(Index)))) %>% features(d_Index, unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.00749 0.1
#checking residuals of differenced time series
CPI_durable_goods %>%
gg_tsdisplay(log(Index) %>% difference() %>% difference(), plot_type="partial", lag_max = 36)+
ggtitle("Residuals of differenced CPI: Durable Goods")
ACF
PACF
The lags can be seen to be decaying exponentially.
Suggested Model: ARIMA(0,2,1) or ARIMA(1,2,1) or ARIMA(0,2,2)
#test data set
cpi_test <- CPI_durable_goods %>%
filter_index("2019 Jan" ~.)
cpi_train <- CPI_durable_goods %>%
filter_index(~ "2018 Dec")
arima_all_models4 <- cpi_train %>%
model(
arima021 = ARIMA(log(Index) ~ pdq(0,2,1) + PDQ(0,0,0)),
arima121 = ARIMA(log(Index) ~ pdq(1,2,1) + PDQ(0,0,0)),
arima120 = ARIMA(log(Index) ~ pdq(1,2,0) + PDQ(0,0,0)),
arima022 = ARIMA(log(Index) ~ pdq(0,2,2) + PDQ(0,0,0)),
arima_model = ARIMA(log(Index), stepwise = FALSE, approx = FALSE)
)
glance(arima_all_models4) %>% arrange(AICc)
## # A tibble: 5 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima_model 0.00000572 3485. -6959. -6959. -6936. <cpl [1]> <cpl [25]>
## 2 arima121 0.00000585 3475. -6945. -6945. -6931. <cpl [1]> <cpl [1]>
## 3 arima022 0.00000591 3471. -6936. -6936. -6923. <cpl [0]> <cpl [2]>
## 4 arima021 0.00000624 3451. -6898. -6898. -6888. <cpl [0]> <cpl [1]>
## 5 arima120 0.00000707 3404. -6803. -6803. -6794. <cpl [1]> <cpl [0]>
best_arima_model4 <- arima_all_models4 %>% select(arima_model)
report(best_arima_model4)
## Series: Index
## Model: ARIMA(1,2,1)(0,0,2)[12]
## Transformation: log(Index)
##
## Coefficients:
## ar1 ma1 sma1 sma2
## 0.2991 -0.8943 -0.1252 -0.1167
## s.e. 0.0439 0.0212 0.0371 0.0377
##
## sigma^2 estimated as 5.721e-06: log likelihood=3484.56
## AIC=-6959.13 AICc=-6959.05 BIC=-6936
Best ARIMA model-CPI: ARIMA(1,2,1)(0,0,2)[12]
#looking at residuals
best_arima_model4 %>%
gg_tsresiduals()+ ggtitle("Residual analysis of CPI Arima model")
#Doing Ljung box test on residuals
Box.test(augment(best_arima_model4)$.resid, lag=36, fitdf = 2, type = "Lj")
##
## Box-Ljung test
##
## data: augment(best_arima_model4)$.resid
## X-squared = 99.814, df = 34, p-value = 2.177e-08
Observations
Using ETS model
stl2 <- decomposition_model(
STL(log(Index)),
ETS(season_adjust ~ error("A") + trend("Ad") + season("N"))
)
ets_all_models4 <- cpi_train %>%
model(AAdN = ETS(log(Index) ~ error("A") + trend("Ad") + season("N")),
AAN = ETS(log(Index) ~ error("A") + trend("A") + season("N")),
MAN = ETS(log(Index) ~ error("M") + trend("A") + season("N")),
MAdN = ETS(log(Index) ~ error("M") + trend("Ad") + season("N")),
stl_ets = stl2,
ETS = ETS(log(Index))
)
accuracy(ets_all_models4)
## # A tibble: 6 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAdN Training 0.00851 0.218 0.155 0.0143 0.179 0.0741 0.0779 0.222
## 2 AAN Training -0.00234 0.221 0.157 -0.000505 0.181 0.0752 0.0789 0.274
## 3 MAN Training -0.00244 0.221 0.157 -0.000505 0.181 0.0752 0.0789 0.281
## 4 MAdN Training 0.00793 0.218 0.156 0.0139 0.180 0.0746 0.0781 0.260
## 5 stl_ets Training 0.00787 0.208 0.151 0.0130 0.172 0.0721 0.0744 0.213
## 6 ETS Training 0.00851 0.218 0.155 0.0143 0.179 0.0741 0.0779 0.222
Best ETS Model-CPI: Durable goods: ETS(A,Ad,N) and stl decomposition ETS(A,Ad,N)
best_ets_model4_1 <- ets_all_models4 %>%
select(AAdN)
best_ets_model4_2 <- ets_all_models4 %>%
select(stl_ets)
best_ets_model4_1 %>%
gg_tsresiduals(lag_max = 24)+ggtitle("Residual analysis of CPI Durable: ETS model")
#Doing Ljung box test on residuals
Box.test(augment(best_ets_model4_1)$.resid, lag=36, fitdf = 5, type = "Lj")
##
## Box-Ljung test
##
## data: augment(best_ets_model4_1)$.resid
## X-squared = 316.24, df = 31, p-value < 2.2e-16
best_ets_model4_2 %>%
gg_tsresiduals(lag_max = 24)+ggtitle("Residual analysis of CPI Durable: ETS model")
#Doing Ljung box test on residuals
Box.test(augment(best_ets_model4_2)$.resid, lag=36, fitdf = 5, type = "Lj")
##
## Box-Ljung test
##
## data: augment(best_ets_model4_2)$.resid
## X-squared = 403.47, df = 31, p-value < 2.2e-16
Observations
Plotting both forecasts
fc7 <- best_arima_model4 %>% forecast(h = "5 years")
fc8_1 <- best_ets_model4_1 %>% forecast(h = "5 years")
fc8_2 <- best_ets_model4_2 %>% forecast(h = "5 years")
c1<- fc7 %>%
autoplot(cpi_train ) +
geom_line(data=cpi_test, aes(x=Month, y=Index), col='red')+
ggtitle("CPI: Durable Goods: ARIMA")+ylab("Index")
c2<- fc8_1 %>%
autoplot(cpi_train) +
geom_line(data=cpi_test, aes(x=Month, y=Index), col='red')+
ggtitle("CPI: Durable Goods: ETS")+ylab("Index")
c3<- fc8_2 %>%
autoplot(cpi_train) +
geom_line(data=cpi_test, aes(x=Month, y=Index), col='red')+
ggtitle("CPI: Durable Goods: ETS")+ylab("Index")
grid.arrange(c1,c2,c3, nrow =3)
# Generate forecasts and compare accuracy over the test set
bind_rows(
best_arima_model4 %>% accuracy(),
best_ets_model4_1 %>% accuracy(),
best_ets_model4_2 %>% accuracy(),
best_arima_model4 %>% forecast(h = "3 years") %>% accuracy(cpi_test),
best_ets_model4_1 %>% forecast(h = "3 years") %>% accuracy(cpi_test),
best_ets_model4_2 %>% forecast(h = "3 years") %>% accuracy(cpi_test)
) %>%
select(-ME, -MPE, -ACF1)
## # A tibble: 6 x 7
## .model .type RMSE MAE MAPE MASE RMSSE
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima_model Training 0.204 0.145 0.171 0.0694 0.0731
## 2 AAdN Training 0.218 0.155 0.179 0.0741 0.0779
## 3 stl_ets Training 0.208 0.151 0.172 0.0721 0.0744
## 4 arima_model Test 7.88 4.70 3.87 NaN NaN
## 5 AAdN Test 6.35 3.93 3.27 NaN NaN
## 6 stl_ets Test 6.59 4.02 3.33 NaN NaN
Clearly, ETS(A,Ad,N) model performed better than ARIMA on test data here as RMSE of ETS model is lesser. So we will use ETS model to forecast CPI
3.5.Real Disposable Income
Looking into the series
rdpi <- data_rdpi %>% mutate(Income = DPIC96) %>% select(Quarter,Income)
#plotting graphs to see our time series
g1<- rdpi %>%
autoplot(Income) + ggtitle("Real personal disposable income")
g2<- rdpi %>%
gg_season(Income) + ggtitle("Real personal disposable income")
g3<- rdpi %>%
gg_subseries(Income) + ggtitle("Real personal disposable income")
grid.arrange(g1, g2,g3, nrow = 3)
Observations for Real Personal Consumption Expenditure: Durable Goods
Forecast: real disposable income
We will build two competitive models : ARIMA and ETS on training data and check the best performing model on test data set for every forecasting exercise we will do by looking at its accuracy
Using ARIMA Model
#Checking for stationary
rdpi %>% ACF(Income) %>% autoplot()+ggtitle("ACF plot for Real Disposable income")
#Running KPSS to statistically justify that data is stationary or not and find order of the differincing - Null hypothesis (Ho): Data is stationary - Alternate hypothesis (Halt): Data is non-stationary
rdpi %>% features(Income, unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 4.94 0.01
#no. of seasonal difference
rdpi %>% mutate(t_Income = Income) %>%
features(t_Income, unitroot_nsdiffs)
## # A tibble: 1 x 1
## nsdiffs
## <int>
## 1 0
#no. of regular difference
rdpi %>% mutate(t_Income = Income) %>% features(t_Income, unitroot_ndiffs)
## # A tibble: 1 x 1
## ndiffs
## <int>
## 1 2
rdpi %>%
autoplot(Income %>% difference()%>%difference())+
ggtitle("Real Disposable Personal Income(Differenced timeseries)")
#KPSS on box-cox transformed seasonal differenced time series
rdpi %>% mutate(t_Income = difference(difference(Income))) %>%
features(t_Income, unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.0128 0.1
#checking residuals of differenced time series
rdpi %>%
gg_tsdisplay(Income %>% difference()%>% difference(), plot_type="partial")+
ggtitle("Residuals of differenced RDPI")
ACF plot
PACF plot
#test data set
rdpi_test <- rdpi %>%
filter_index("2019 Q1" ~.)
#training data set
rdpi_train <- rdpi %>%
filter_index(~ "2018 Q4")
arima_all_models_rdpi <- rdpi_train %>%
model(
arima023 = ARIMA(Income ~ pdq(0,2,3) + PDQ(0,0,0)),
arima021 = ARIMA(Income ~ pdq(0,2,1) + PDQ(0,0,0)),
arima220 = ARIMA(Income ~ pdq(2,2,0) + PDQ(0,0,0)),
auto_arima = ARIMA(Income, stepwise = FALSE, approx = FALSE)
)
glance(arima_all_models_rdpi) %>% arrange(AICc)
## # A tibble: 4 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 auto_arima 4160. -1597. 3204. 3204. 3222. <cpl [4]> <cpl [3]>
## 2 arima023 4179. -1598. 3204. 3204. 3219. <cpl [0]> <cpl [3]>
## 3 arima021 4541. -1611. 3226. 3226. 3233. <cpl [0]> <cpl [1]>
## 4 arima220 5143. -1627. 3261. 3261. 3271. <cpl [2]> <cpl [0]>
report(arima_all_models_rdpi %>% select(auto_arima))
## Series: Income
## Model: ARIMA(0,2,3)(1,0,0)[4]
##
## Coefficients:
## ma1 ma2 ma3 sar1
## -1.1815 0.4197 -0.1961 -0.0966
## s.e. 0.0585 0.0871 0.0592 0.0643
##
## sigma^2 estimated as 4160: log likelihood=-1596.9
## AIC=3203.81 AICc=3204.02 BIC=3222.09
Best ARIMA model-RDPI: ARIMA(0,2,3)
best_arima_model_rdpi<- arima_all_models_rdpi %>% select(arima023)
#looking at residuals
best_arima_model_rdpi %>%
gg_tsresiduals()+ ggtitle("Residual analysis of RDPI Arima model")
#Doing Ljung box test on residuals
Box.test(augment(best_arima_model_rdpi)$.resid, lag=36, fitdf = 3, type = "Lj")
##
## Box-Ljung test
##
## data: augment(best_arima_model_rdpi)$.resid
## X-squared = 71.61, df = 33, p-value = 0.0001131
Observations
Using ETS model
stl_rd <- decomposition_model(
STL(Income),
ETS(season_adjust ~ error("A") + trend("A") + season("N"))
)
ets_all_models_rd <- rdpi_train %>%
model(
AAdN = ETS(Income ~ error("A") + trend("Ad") + season("N")),
AAN = ETS(Income ~ error("A") + trend("A") + season("N")),
stl_ets = stl_rd,
ETS_auto = ETS(Income)
)
accuracy(ets_all_models_rd)
## # A tibble: 4 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAdN Training 10.5 66.7 40.2 0.174 0.690 0.211 0.286 -0.0601
## 2 AAN Training 6.01 65.5 39.4 0.0981 0.681 0.206 0.281 -0.0418
## 3 stl_ets Training 5.89 62.1 39.0 0.107 0.651 0.204 0.267 -0.0338
## 4 ETS_auto Training 5.90 66.2 38.6 0.0996 0.665 0.202 0.284 -0.187
Best ETS model-RDPI: Decomposition model with additive trend : stl_ets with lowest RMSE
best_ets_model_rd_1 <- ets_all_models_rd %>%
select(stl_ets)
best_ets_model_rd_2 <- ets_all_models_rd %>%
select(AAN)
# Generate forecasts and compare accuracy over the test set
bind_rows(
best_arima_model_rdpi %>% accuracy(),
best_ets_model_rd_1 %>% accuracy(),
best_ets_model_rd_2 %>% accuracy(),
best_arima_model_rdpi %>% forecast(h = "4 years") %>% accuracy(rdpi_test),
best_ets_model_rd_1 %>% forecast(h = "4 years") %>% accuracy(rdpi_test),
best_ets_model_rd_2 %>% forecast(h = "4 years") %>% accuracy(rdpi_test)
) %>%
select(-ME, -MPE, -ACF1)
## # A tibble: 6 x 7
## .model .type RMSE MAE MAPE MASE RMSSE
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima023 Training 64.1 40.3 0.687 0.211 0.275
## 2 stl_ets Training 62.1 39.0 0.651 0.204 0.267
## 3 AAN Training 65.5 39.4 0.681 0.206 0.281
## 4 arima023 Test 703. 423. 2.59 NaN NaN
## 5 stl_ets Test 699. 421. 2.58 NaN NaN
## 6 AAN Test 694. 419. 2.57 NaN NaN
So, since RMSE of ETS(A,A,N) comes out to be lowest on test data. Hence, ETS(A,A,N) is a better model. We will forecast the real personal disposable income for 1 year ahead by using ETS(A,A,N) model
4.1.Real Personal Consumption Expenditure: Non Durable Goods
Looking at time series
#plotting graphs to see our time series
g1<- rpce_ndurable_goods_gdp %>%
autoplot(Expenditure) + ggtitle("Real personal consumption Expenditure: Non Durable Goods") + ylab("% of GDP")
g2<- rpce_ndurable_goods_gdp %>%
gg_season(Expenditure) + ggtitle("Real personal consumption Expenditure: Non Durable Goods")+ylab("% of GDP")
g3<- rpce_ndurable_goods_gdp %>%
gg_subseries(Expenditure) + ggtitle("Real personal consumption Expenditure: Non Durable Goods")+ylab("% of GDP")
grid.arrange(g1, g2,g3, nrow = 3)
#Checking for different components in the series
rpce_ndurable_goods_gdp %>%
model(
STL(Expenditure)) %>%
components() %>%
autoplot()+
labs(title = "STL decomposition of RPCE: Non durable goods")
Forecast: RPCE
#Checking for stationary
rpce_ndurable_goods_gdp %>% ACF(Expenditure) %>% autoplot()+ggtitle("ACF plot for Real Personal Consumption Expenditure: Non-Durable goods")
Since data has trend and seasonality so it is non-stationary data.
#no. of seasonal difference
rpce_ndurable_goods_gdp %>% mutate(t_Expenditure = Expenditure) %>%
features(t_Expenditure, unitroot_nsdiffs)
## # A tibble: 1 x 1
## nsdiffs
## <int>
## 1 1
#no. of regular difference
rpce_ndurable_goods_gdp %>% mutate(d_Expenditure = difference(Expenditure,4)) %>% features(d_Expenditure, unitroot_ndiffs)
## # A tibble: 1 x 1
## ndiffs
## <int>
## 1 1
rpce_ndurable_goods_gdp %>%
autoplot(Expenditure%>% difference(lag=4)%>% difference())+
ggtitle("Real personal consumption Expenditure: NOn-Durable Goods(Differenced timeseries)")+ylab("% of GDP")
#KPSS on box-cox transformed seasonal differenced time series
rpce_ndurable_goods_gdp %>% mutate(t_Expenditure = difference(difference(Expenditure,4))) %>%
features(t_Expenditure, unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.0346 0.1
#checking residuals of differenced time series
rpce_ndurable_goods_gdp %>%
gg_tsdisplay(Expenditure %>% difference(lag=4)%>%difference(), plot_type="partial")+
ggtitle("Residuals of differenced RPCE: Non-Durable Goods")
#test data set
nrpce_test <- rpce_ndurable_goods_gdp %>%
filter_index("2020 Q1" ~.)
#training data set
nrpce_train <- rpce_ndurable_goods_gdp %>%
filter_index(~ "2019 Q4")
arima_models1 <- nrpce_train %>%
model(auto_arima = ARIMA(Expenditure, stepwise = FALSE, approx = FALSE)
)
glance(arima_models1) %>% arrange(AICc)
## # A tibble: 1 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 auto_arima 0.0181 40.6 -75.2 -74.9 -68.6 <cpl [5]> <cpl [0]>
ets_models1 <- nrpce_train %>%
model(
ANA = ETS(Expenditure ~ error("A") + trend("N") + season("A")),
MNM = ETS(Expenditure ~ error("M") + trend("N") + season("M")),
ETS_auto = ETS(Expenditure)
)
accuracy(ets_models1)
## # A tibble: 3 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ANA Training -0.00721 0.138 0.105 -0.0446 0.661 0.745 0.792 0.216
## 2 MNM Training -0.00811 0.137 0.105 -0.0503 0.661 0.747 0.790 0.203
## 3 ETS_auto Training -0.00811 0.137 0.105 -0.0503 0.661 0.747 0.790 0.203
best_arima_model1_nd <- arima_models1 %>% select(auto_arima)
report(best_arima_model1_nd)
## Series: Expenditure
## Model: ARIMA(1,0,0)(1,1,0)[4]
##
## Coefficients:
## ar1 sar1
## 0.6352 -0.2131
## s.e. 0.0923 0.1238
##
## sigma^2 estimated as 0.01812: log likelihood=40.61
## AIC=-75.23 AICc=-74.85 BIC=-68.57
Best ARIMA model-RPCE: ARIMA(1,0,0)(1,1,0)[4]
#looking at residuals
best_arima_model1_nd %>%
gg_tsresiduals()+ ggtitle("Residual analysis of RPCE Arima model")
#Doing Ljung box test on residuals
Box.test(augment(best_arima_model1_nd)$.resid, lag=36, fitdf = 2, type = "Lj")
##
## Box-Ljung test
##
## data: augment(best_arima_model1_nd)$.resid
## X-squared = 17.636, df = 34, p-value = 0.9907
Observations
Best ETS model-PRCE: ETS_auto
nbest_ets_model1 <- ets_models1 %>%
select(ETS_auto)
report(nbest_ets_model1)
## Series: Expenditure
## Model: ETS(M,N,M)
## Smoothing parameters:
## alpha = 0.442619
## gamma = 0.5573808
##
## Initial states:
## l[0] s[0] s[-1] s[-2] s[-3]
## 15.99234 1.076115 0.977718 0.9838233 0.9623433
##
## sigma^2: 1e-04
##
## AIC AICc BIC
## 33.61227 35.36227 49.54893
nbest_ets_model1 %>%
gg_tsresiduals(lag_max = 24)+ggtitle("Residual analysis of RPCE Non Durable: ETS model")
Observations
Selecting best model for RPCE: Durable goods(ARIMA or ETS)
# Generate forecasts and compare accuracy over the test set
bind_rows(
best_arima_model1_nd %>% accuracy(),
nbest_ets_model1 %>% accuracy(),
best_arima_model1_nd %>% forecast(h = "4 years") %>% accuracy(nrpce_test),
nbest_ets_model1 %>% forecast(h = "4 years") %>% accuracy(nrpce_test)
) %>%
select(-ME, -MPE, -ACF1)
## # A tibble: 4 x 7
## .model .type RMSE MAE MAPE MASE RMSSE
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 auto_arima Training 0.129 0.102 0.644 0.725 0.741
## 2 ETS_auto Training 0.137 0.105 0.661 0.747 0.790
## 3 auto_arima Test 1.38 1.34 7.88 NaN NaN
## 4 ETS_auto Test 1.36 1.31 7.72 NaN NaN
So, since RMSE of ETS(M,N,M) comes out to be lowest on test data. Hence, ETS(M,N,M) is a better model. We will forecast the real personal consumption expenditure as % of GDP for 1 year ahead by using ETS(M,N,M) model
4.2.Industrial Production: Non-Durable Goods
Looking at time series
#plotting graphs to see our time series
g1<- industrial_prod_ndurable_goods %>%
autoplot(Index) + ggtitle("Industrial Production: Non- Durable Goods") + ylab("Index")
g2<- industrial_prod_ndurable_goods %>%
gg_season(Index) + ggtitle("Industrial Production: Non - Durable Goods") + ylab("Index")
grid.arrange(g1, g2, nrow = 2)
#Checking for different components in the series
industrial_prod_ndurable_goods %>%
model(
STL(Index)) %>%
components() %>%
autoplot()+
labs(title = "STL decomposition of Industrial Production: Non-Durable goods")
Forecast: Industrial production
#transform the data and stabilize as there is variation in the series which is increasing with time
industrial_prod_ndurable_goods %>%
autoplot(log(Index)) +
labs(y = "", title = "Log Transformed Industrial Production for Non- Durable Goods")
#Step 2: Checking for stationary
industrial_prod_ndurable_goods %>% ACF(Index) %>% autoplot()+ ggtitle("ACF plot for Industrial production: Non- Durable goods")
#Running KPSS to statistically justify that data is stationary or not and find order of the differincing - Null hypothesis (Ho): Data is stationary - Alternate hypothesis (Halt): Data is non-stationary
industrial_prod_ndurable_goods %>% features(log(Index), unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 11.9 0.01
#no. of seasonal difference - industrial production
industrial_prod_ndurable_goods %>% mutate(t_Index = log(Index)) %>%
features(t_Index, unitroot_nsdiffs)
## # A tibble: 1 x 1
## nsdiffs
## <int>
## 1 1
#no. of regular difference - industrial production
industrial_prod_ndurable_goods %>% mutate(d_Index = difference(log(Index),12)) %>% features(d_Index, unitroot_ndiffs)
## # A tibble: 1 x 1
## ndiffs
## <int>
## 1 1
industrial_prod_ndurable_goods %>%
autoplot(log(Index)%>% difference(lag=12)%>%difference())+
ggtitle("Industrial Production: Non- Durable Goods(Differenced timeseries)")+ylab("Index")
#KPSS on box-cox transformed differenced time series
industrial_prod_ndurable_goods %>% mutate(d_Index = difference(log(Index),12)%>%difference()) %>% features(d_Index, unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.00984 0.1
#checking residuals of differenced time series
industrial_prod_ndurable_goods %>%
gg_tsdisplay(log(Index) %>% difference(lag=12)%>% difference(), plot_type="partial")+
ggtitle("Residuals of differenced Industrial Production: Non- Durable Goods")
ACF
PACF
We can see negative spike at lag 12 and lag42. This is seasonal spikes. Further these seasonal spike seems to be decaying exponentially over time. This suggest AR(1) or AR(2) seasonal component
Significant partial autocorrelation is seen at lag 1. This is non- seasonal spike which suggest AR(1) non-seasonal component
Moodels suggested :
#test data set
nip_test <- industrial_prod_ndurable_goods %>%
filter_index("2019 Jan" ~.)
#training data
nip_train <- industrial_prod_ndurable_goods %>%
filter_index(~ "2018 Dec")
arima_models2 <- nip_train %>%
model(
arima110110 = ARIMA(log(Index) ~ pdq(1,1,0) + PDQ(1,1,0)),
arima110210 = ARIMA(log(Index) ~ pdq(1,1,0) + PDQ(2,1,0)),
arima110011 = ARIMA(log(Index) ~ pdq(1,1,0) + PDQ(0,1,1)),
arima011012 = ARIMA(log(Index) ~ pdq(0,1,1) + PDQ(0,1,2)),
auto_arima1 = ARIMA(log(Index), stepwise = FALSE, approx = FALSE)
)
glance(arima_models2) %>% arrange(AICc)
## # A tibble: 5 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 auto_arima1 0.0000608 2924. -5837. -5837. -5813. <cpl [1]> <cpl [14]>
## 2 arima011012 0.0000619 2916. -5823. -5823. -5804. <cpl [0]> <cpl [25]>
## 3 arima110011 0.0000622 2913. -5820. -5820. -5806. <cpl [1]> <cpl [12]>
## 4 arima110210 0.0000638 2903. -5799. -5799. -5780. <cpl [25]> <cpl [0]>
## 5 arima110110 0.0000696 2867. -5728. -5728. -5714. <cpl [13]> <cpl [0]>
nbest_arima_model2 <- arima_models2 %>% select(auto_arima1)
report(nbest_arima_model2)
## Series: Index
## Model: ARIMA(1,1,2)(0,1,1)[12]
## Transformation: log(Index)
##
## Coefficients:
## ar1 ma1 ma2 sma1
## 0.9121 -1.1226 0.1441 -0.5943
## s.e. 0.0268 0.0470 0.0411 0.0297
##
## sigma^2 estimated as 6.081e-05: log likelihood=2923.58
## AIC=-5837.16 AICc=-5837.09 BIC=-5813.43
Best ARIMA model-Industrial Production: ARIMA(1,1,2)(0,1,1)[12]
#looking at residuals
nbest_arima_model2 %>%
gg_tsresiduals()+ ggtitle("Residual analysis of Industrial Production Arima model")
#Doing Ljung box test on residuals
Box.test(augment(nbest_arima_model2)$.resid, lag=36, fitdf = 4, type = "Lj")
##
## Box-Ljung test
##
## data: augment(nbest_arima_model2)$.resid
## X-squared = 48.106, df = 32, p-value = 0.03363
Observations
Using ETS Model
ets_models2 <- nip_train %>%
model(
AAdA = ETS(log(Index) ~ error("A") + trend("Ad") + season("A")),
AAA = ETS(log(Index) ~ error("A") + trend("A") + season("A")),
MAM = ETS(log(Index) ~ error("M") + trend("A") + season("M")),
MAdM = ETS(log(Index) ~ error("M") + trend("Ad") + season("M")),
ETS = ETS(log(Index))
)
accuracy(ets_models2)
## # A tibble: 5 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAdA Training 0.0882 0.989 0.746 0.107 0.777 0.438 0.474 0.535
## 2 AAA Training -0.0531 0.919 0.684 -0.0404 0.707 0.402 0.441 0.446
## 3 MAM Training -0.0746 0.926 0.687 -0.0744 0.712 0.403 0.444 0.476
## 4 MAdM Training 0.156 0.958 0.730 0.193 0.756 0.429 0.459 0.432
## 5 ETS Training -0.0738 0.903 0.673 -0.0616 0.689 0.395 0.433 0.360
Best ETS Model- Industrial Production: ETS(M,A,A)
nbest_ets_model2 <- ets_models2 %>%
select(ETS)
report(nbest_ets_model2)
## Series: Index
## Model: ETS(M,A,A)
## Transformation: log(Index)
## Smoothing parameters:
## alpha = 0.4108343
## beta = 0.003601138
## gamma = 0.4986349
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4]
## 3.867978 0.001720087 0.000374136 0.004661909 0.03279961 0.03542299 0.0162881
## s[-5] s[-6] s[-7] s[-8] s[-9] s[-10]
## -0.01623705 -0.009481794 -0.03783974 -0.01595397 -0.01041062 -0.004165567
## s[-11]
## 0.004542006
##
## sigma^2: 0
##
## AIC AICc BIC
## -2260.207 -2259.484 -2179.260
nbest_ets_model2 %>%
gg_tsresiduals(lag_max = 24)+ggtitle("Residual analysis of Industrial production Non Durable: ETS model")
#Doing Ljung box test on residuals
Box.test(augment(nbest_ets_model2)$.resid, lag=36, fitdf = 6, type = "Lj")
##
## Box-Ljung test
##
## data: augment(nbest_ets_model2)$.resid
## X-squared = 472.18, df = 30, p-value < 2.2e-16
Observations
Selecting best model for Industrial Production: Non-Durable goods(ARIMA or ETS)
# Generate forecasts and compare accuracy over the test set
bind_rows(
nbest_arima_model2 %>% accuracy(),
nbest_ets_model2 %>% accuracy(),
nbest_arima_model2 %>% forecast(h = "4 years") %>% accuracy(nip_test),
nbest_ets_model2%>% forecast(h = "4 years") %>% accuracy(nip_test)
) %>%
select(-ME, -MPE, -ACF1)
## # A tibble: 4 x 7
## .model .type RMSE MAE MAPE MASE RMSSE
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 auto_arima1 Training 0.788 0.585 0.593 0.344 0.378
## 2 ETS Training 0.903 0.673 0.689 0.395 0.433
## 3 auto_arima1 Test 2.41 1.77 1.45 NaN NaN
## 4 ETS Test 2.27 1.51 1.25 NaN NaN
So, since RMSE of ETS(M,A,A) comes out to be lowest on test data. Hence, ETS(M,A,A) is a better model. We will forecast the industrial production of non durable goods for 1 year ahead by using ETS(M,A,A) model
4.3.Producer Price Index by Commodity: Non-Durable Goods
Looking at time series
#plotting graphs to see our time series
g1<- PPI_ndurable_goods %>%
autoplot(Index) + ggtitle("Producer Price Index: Non- Durable Goods") + ylab("Index")
g2<- PPI_ndurable_goods %>%
gg_season(Index) + ggtitle("Producer Price Index: Non- Durable Goods") + ylab("Index")
grid.arrange(g1, g2, nrow = 2)
#Checking for different components in the series
PPI_ndurable_goods %>%
model(
STL(Index)) %>%
components() %>%
autoplot()+
labs(title = "STL decomposition of PPI: Non Durable goods")
Forecast: PPI-Non Durable goods
#transform the data and stabilize as there is variation in the series which is increasing with time
PPI_ndurable_goods %>%
autoplot(log(Index)) +
labs(y = "", title = "Log Transformed PPI By Commodity:Non Durable Goods")
#Step 2: Checking for stationary
PPI_ndurable_goods %>% ACF(Index) %>% autoplot()+ggtitle("ACF plot for PPI: Non Durable goods")
#Running KPSS to statistically justify that data is stationary or not and find order of the differincing - Null hypothesis (Ho): Data is stationary - Alternate hypothesis (Halt): Data is non-stationary
PPI_ndurable_goods %>% features(log(Index), unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 12.8 0.01
#no. of seasonal difference - PPI
PPI_ndurable_goods %>% mutate(t_Index = log(Index)) %>%
features(t_Index, unitroot_nsdiffs)
## # A tibble: 1 x 1
## nsdiffs
## <int>
## 1 0
#no. of regular difference - PPI
PPI_ndurable_goods %>% features(log(Index), unitroot_ndiffs)
## # A tibble: 1 x 1
## ndiffs
## <int>
## 1 1
PPI_ndurable_goods %>%
autoplot(log(Index)%>% difference())+
ggtitle("PPI: Non Durable Goods(Differenced timeseries)")+ylab("Index")
#KPSS on box-cox transformed differenced time series
PPI_ndurable_goods %>% mutate(d_Index = (difference(log(Index)))) %>% features(d_Index, unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.218 0.1
#checking residuals of differenced time series
PPI_ndurable_goods %>%
gg_tsdisplay(log(Index) %>% difference() , plot_type="partial")+
ggtitle("Residuals of differenced PPI: Non Durable Goods")
ACF
PACF
The lags can be seen to be decaying exponentially. There is some negative partial autocorrelation at lag1 significant, which suggests AR(1) non-seasonal components
Suggested Model
#test data set
nppi_test <- PPI_ndurable_goods %>%
filter_index("2019 Jan" ~.)
nppi_train <- PPI_ndurable_goods %>%
filter_index(~ "2018 Dec")
arima_models3 <- nppi_train %>%
model(
arima011000 = ARIMA(log(Index) ~ pdq(0,1,1) + PDQ(0,0,0)),
arima110000 = ARIMA(log(Index) ~ pdq(1,1,0) + PDQ(0,0,0)),
arima_model = ARIMA(log(Index), stepwise = FALSE, approx = FALSE)
)
glance(arima_models3) %>% arrange(AICc)
## # A tibble: 3 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima_model 0.0000944 2768. -5522. -5521. -5488. <cpl [2]> <cpl [25]>
## 2 arima110000 0.0000978 2751. -5496. -5495. -5481. <cpl [1]> <cpl [0]>
## 3 arima011000 0.0000994 2744. -5482. -5482. -5467. <cpl [0]> <cpl [1]>
nbest_arima_model3 <- arima_models3 %>% select(arima_model)
report(nbest_arima_model3)
## Series: Index
## Model: ARIMA(2,1,1)(0,0,2)[12] w/ drift
## Transformation: log(Index)
##
## Coefficients:
## ar1 ar2 ma1 sma1 sma2 constant
## 1.2526 -0.2705 -0.9344 -0.0961 -0.1525 0
## s.e. 0.0472 0.0400 0.0301 0.0355 0.0352 0
##
## sigma^2 estimated as 9.435e-05: log likelihood=2767.79
## AIC=-5521.57 AICc=-5521.44 BIC=-5488.28
Best ARIMA model-PPI: ARIMA(2,1,1)(0,0,2)[12] w/ drift
#looking at residuals
nbest_arima_model3 %>%
gg_tsresiduals()+ ggtitle("Residual analysis of PPI Arima model")
#Doing Ljung box test on residuals
Box.test(augment(nbest_arima_model3)$.resid, lag=36, fitdf = 5, type = "Lj")
##
## Box-Ljung test
##
## data: augment(nbest_arima_model3)$.resid
## X-squared = 92.337, df = 31, p-value = 5.193e-08
Observations
st <- decomposition_model(
STL(log(Index)),
ETS(season_adjust ~ error("A") + trend("Ad") + season("N"))
)
ets_models3 <- nppi_train %>%
model(
snaive = SNAIVE(log(Index)),
AAdN = ETS(log(Index) ~ error("A") + trend("Ad") + season("N")),
AAN = ETS(log(Index) ~ error("A") + trend("A") + season("N")),
MAN = ETS(log(Index) ~ error("M") + trend("A") + season("N")),
MAdN = ETS(log(Index) ~ error("M") + trend("Ad") + season("N")),
ETS = ETS(log(Index)),
stl_ets = st
)
accuracy(ets_models3)
## # A tibble: 7 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 snaive Training 3.11 8.25 5.24 3.03 4.59 1 1 0.944
## 2 AAdN Training 0.112 1.69 0.853 0.129 0.659 0.163 0.204 0.0871
## 3 AAN Training -0.0263 1.75 0.850 -0.00995 0.654 0.162 0.212 0.189
## 4 MAN Training -0.0265 1.76 0.854 -0.00982 0.654 0.163 0.213 0.161
## 5 MAdN Training 0.0853 1.69 0.853 0.0984 0.650 0.163 0.205 0.0142
## 6 ETS Training 0.0853 1.69 0.853 0.0984 0.650 0.163 0.205 0.0142
## 7 stl_ets Training 0.0861 1.63 0.837 0.0960 0.632 0.160 0.197 0.0208
Best ETS Model- PPI Non Durable goods:stl_ets
nbest_ets_model3 <- ets_models3 %>%
select(stl_ets)
report(nbest_ets_model3)
## Series: Index
## Model: STL decomposition model
## Transformation: log(Index)
## Combination: season_adjust + season_year
##
## ========================================
##
## Series: season_adjust
## Model: ETS(A,Ad,N)
## Smoothing parameters:
## alpha = 0.9998999
## beta = 0.3871663
## phi = 0.8000002
##
## Initial states:
## l[0] b[0]
## 3.180833 -0.006346274
##
## sigma^2: 1e-04
##
## AIC AICc BIC
## -2206.681 -2206.583 -2178.133
##
## Series: season_year
## Model: SNAIVE
##
## sigma^2: 0
nbest_ets_model3 %>%
gg_tsresiduals(lag_max = 24)+ggtitle("Residual analysis of PPI Durable: ETS model")
#Doing Ljung box test on residuals
Box.test(augment(nbest_ets_model3)$.resid, lag=36, fitdf = 5, type = "Lj")
##
## Box-Ljung test
##
## data: augment(nbest_ets_model3)$.resid
## X-squared = 268.58, df = 31, p-value < 2.2e-16
Observations
Selecting best model for PPI: Durable goods(ARIMA or ETS)
# Generate forecasts and compare accuracy over the test set
bind_rows(
nbest_arima_model3 %>% accuracy(),
nbest_ets_model3 %>% accuracy(),
nbest_arima_model3 %>% forecast(h = "4 years") %>% accuracy(nppi_test),
nbest_ets_model3 %>% forecast(h = "4 years") %>% accuracy(nppi_test)
) %>%
select(-ME, -MPE, -ACF1)
## # A tibble: 4 x 7
## .model .type RMSE MAE MAPE MASE RMSSE
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima_model Training 1.63 0.813 0.628 0.155 0.198
## 2 stl_ets Training 1.63 0.837 0.632 0.160 0.197
## 3 arima_model Test 20.3 14.8 5.59 NaN NaN
## 4 stl_ets Test 30.1 21.4 7.91 NaN NaN
Clearly, ARIMA model performed better than ETS on test data here as RMSE of arima model is lesser. So we will use ARIMA model to forecast PPI
4.4.Consumer Price Index for all Urban Consumers: Non- Durable Goods
Looking at time series
#plotting graphs to see our time series
g1<- CPI_ndurable_goods %>%
autoplot(Index) + ggtitle("Consumer Price Index: Non-Durable Goods")
g2<- CPI_ndurable_goods %>%
gg_season(Index) + ggtitle("Consumer Price Index: Non-Durable Goods")
grid.arrange(g1, g2, nrow = 2)
#Checking for different components in the series
CPI_ndurable_goods %>%
model(
STL(Index)) %>%
components() %>%
autoplot()+
labs(title = "STL decomposition of CPI: Non-Durable goods")
Forecast: CPI-non durable goods
#transform the data and stabilize as there is variation in the series which is increasing with time
CPI_ndurable_goods %>%
autoplot(log(Index)) +
labs(y = "", title = "Log Transformed Consumer Price Index for Non Durable Goods")
#Step 2: Checking for stationary
CPI_ndurable_goods %>% ACF(Index) %>% autoplot()+ggtitle("ACF plot for CPI: Non Durable goods")
#Running KPSS to statistically justify that data is stationary or not and find order of the differincing - Null hypothesis (Ho): Data is stationary - Alternate hypothesis (Halt): Data is non-stationary
CPI_ndurable_goods %>% features(log(Index), unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 11.2 0.01
#no. of seasonal difference - Consumer price index
CPI_ndurable_goods %>% mutate(t_Index = log(Index)) %>%
features(t_Index, unitroot_nsdiffs)
## # A tibble: 1 x 1
## nsdiffs
## <int>
## 1 0
#no. of regular difference - industrial production
CPI_ndurable_goods %>% features(log(Index), unitroot_ndiffs)
## # A tibble: 1 x 1
## ndiffs
## <int>
## 1 2
CPI_ndurable_goods %>%
autoplot(log(Index)%>% difference() %>% difference())+
ggtitle("CPI: Durable Goods(Differenced timeseries)")+ylab("Index")
#KPSS on box-cox transformed differenced time series
CPI_ndurable_goods %>% mutate(d_Index = difference(difference(log(Index)))) %>% features(d_Index, unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.00796 0.1
#checking residuals of differenced time series
CPI_ndurable_goods %>%
gg_tsdisplay(log(Index) %>% difference() %>% difference(), plot_type="partial")+
ggtitle("Residuals of differenced CPI: Non Durable Goods")
ACF
PACF
The lags can be seen to be decaying exponentially.
Suggested Model: ARIMA(0,2,1) or ARIMA(1,2,1) or ARIMA(0,2,2)
#test data set
cpi_ntest <- CPI_ndurable_goods %>%
filter_index("2019 Jan" ~.)
cpi_ntrain <- CPI_ndurable_goods %>%
filter_index(~ "2018 Dec")
arima_all_models11 <- cpi_ntrain %>%
model(
arima021 = ARIMA(log(Index) ~ pdq(0,2,1) + PDQ(0,0,0)),
arima121 = ARIMA(log(Index) ~ pdq(1,2,1) + PDQ(0,0,0)),
arima120 = ARIMA(log(Index) ~ pdq(1,2,0) + PDQ(0,0,0)),
arima022 = ARIMA(log(Index) ~ pdq(0,2,2) + PDQ(0,0,0)),
arima_model = ARIMA(log(Index), stepwise = FALSE, approx = FALSE)
)
glance(arima_all_models11) %>% arrange(AICc)
## # A tibble: 5 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima_model 0.0000304 2853. -5692. -5692. -5660. <cpl [0]> <cpl [28]>
## 2 arima022 0.0000315 2838. -5671. -5671. -5657. <cpl [0]> <cpl [2]>
## 3 arima121 0.0000318 2835. -5663. -5663. -5650. <cpl [1]> <cpl [1]>
## 4 arima021 0.0000361 2786. -5569. -5569. -5559. <cpl [0]> <cpl [1]>
## 5 arima120 0.0000429 2723. -5441. -5441. -5432. <cpl [1]> <cpl [0]>
best_arima_model11 <- arima_all_models11 %>% select(arima_model)
report(best_arima_model11)
## Series: Index
## Model: ARIMA(0,2,4)(0,0,2)[12]
## Transformation: log(Index)
##
## Coefficients:
## ma1 ma2 ma3 ma4 sma1 sma2
## -0.5539 -0.3340 -0.1303 0.0816 -0.1313 -0.1093
## s.e. 0.0365 0.0414 0.0441 0.0382 0.0369 0.0345
##
## sigma^2 estimated as 3.042e-05: log likelihood=2853.07
## AIC=-5692.15 AICc=-5692 BIC=-5659.77
Best ARIMA model-CPI: ARIMA(0,2,4)(0,0,2)[12]
#looking at residuals
best_arima_model11 %>%
gg_tsresiduals()+ ggtitle("Residual analysis of CPI Arima model")
#Doing Ljung box test on residuals
Box.test(augment(best_arima_model11)$.resid, lag=36, fitdf = 6, type = "Lj")
##
## Box-Ljung test
##
## data: augment(best_arima_model11)$.resid
## X-squared = 82.691, df = 30, p-value = 8.049e-07
Observations
Using ETS model
stl2 <- decomposition_model(
STL(log(Index)),
ETS(season_adjust ~ error("A") + trend("Ad") + season("N"))
)
ets_all_models11 <- cpi_ntrain %>%
model(AAdN = ETS(log(Index) ~ error("A") + trend("Ad") + season("N")),
AAN = ETS(log(Index) ~ error("A") + trend("A") + season("N")),
MAN = ETS(log(Index) ~ error("M") + trend("A") + season("N")),
MAdN = ETS(log(Index) ~ error("M") + trend("Ad") + season("N")),
stl_ets = stl2,
ETS = ETS(log(Index))
)
accuracy(ets_all_models11)
## # A tibble: 6 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAdN Training 0.0835 1.01 0.528 0.0895 0.386 0.132 0.185 0.125
## 2 AAN Training -0.0147 1.04 0.534 -0.00511 0.387 0.133 0.192 0.346
## 3 MAN Training -0.0138 1.05 0.535 -0.00496 0.386 0.133 0.193 0.337
## 4 MAdN Training 0.0620 1.03 0.531 0.0681 0.385 0.133 0.189 0.213
## 5 stl_ets Training 0.0786 0.970 0.525 0.0824 0.377 0.131 0.179 0.138
## 6 ETS Training 0.0620 1.03 0.531 0.0681 0.385 0.133 0.189 0.213
Best ETS Model-CPI: Durable goods: ETS(A,Ad,N) and stl decomposition ETS(A,Ad,N)
best_ets_model11_1 <- ets_all_models11 %>%
select(AAdN)
best_ets_model11_2 <- ets_all_models11 %>%
select(stl_ets)
best_ets_model11_1 %>%
gg_tsresiduals(lag_max = 24)+ggtitle("Residual analysis of CPI Non-Durable: ETS model")
#Doing Ljung box test on residuals
Box.test(augment(best_ets_model11_1)$.resid, lag=36, fitdf = 5, type = "Lj")
##
## Box-Ljung test
##
## data: augment(best_ets_model11_1)$.resid
## X-squared = 266.3, df = 31, p-value < 2.2e-16
best_ets_model11_2 %>%
gg_tsresiduals(lag_max = 24)+ggtitle("Residual analysis of CPI Non-Durable: ETS model")
#Doing Ljung box test on residuals
Box.test(augment(best_ets_model11_2)$.resid, lag=36, fitdf = 5, type = "Lj")
##
## Box-Ljung test
##
## data: augment(best_ets_model11_2)$.resid
## X-squared = 381.64, df = 31, p-value < 2.2e-16
Observations
Plotting both forecasts
fc15 <- best_arima_model11 %>% forecast(h = "5 years")
fc16_1 <- best_ets_model11_1 %>% forecast(h = "5 years")
fc16_2 <- best_ets_model11_2 %>% forecast(h = "5 years")
c1<- fc15 %>%
autoplot(cpi_ntrain ) +
geom_line(data=cpi_ntest, aes(x=Month, y=Index), col='red')+
ggtitle("CPI: Non-Durable Goods: ARIMA")+ylab("Index")
c2<- fc16_1 %>%
autoplot(cpi_ntrain) +
geom_line(data=cpi_ntest, aes(x=Month, y=Index), col='red')+
ggtitle("CPI: Non-Durable Goods: ETS")+ylab("Index")
c3<- fc16_2 %>%
autoplot(cpi_ntrain) +
geom_line(data=cpi_ntest, aes(x=Month, y=Index), col='red')+
ggtitle("CPI: Non-Durable Goods: ETS")+ylab("Index")
grid.arrange(c1,c2,c3, nrow =3)
# Generate forecasts and compare accuracy over the test set
bind_rows(
best_arima_model11 %>% accuracy(),
best_ets_model11_1 %>% accuracy(),
best_ets_model11_2 %>% accuracy(),
best_arima_model11 %>% forecast(h = "3 years") %>% accuracy(cpi_ntest),
best_ets_model11_1 %>% forecast(h = "3 years") %>% accuracy(cpi_ntest),
best_ets_model11_2 %>% forecast(h = "3 years") %>% accuracy(cpi_ntest)
) %>%
select(-ME, -MPE, -ACF1)
## # A tibble: 6 x 7
## .model .type RMSE MAE MAPE MASE RMSSE
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima_model Training 0.928 0.491 0.363 0.123 0.171
## 2 AAdN Training 1.01 0.528 0.386 0.132 0.185
## 3 stl_ets Training 0.970 0.525 0.377 0.131 0.179
## 4 arima_model Test 9.54 6.78 2.84 NaN NaN
## 5 AAdN Test 13.9 11.3 4.78 NaN NaN
## 6 stl_ets Test 12.5 9.63 4.05 NaN NaN
Clearly, arima_model model performed better than ETS on test data here as RMSE of ARIMA model is lesser. So we will use ARIMA model to forecast CPI
5.1.Real Personal Consumption Expenditure
Looking at the series
data_rpce_services_gdp %>% autoplot(.vars=Expenditure) + labs(title = "Real Personal Consumption Expenditure:Services") +
theme(plot.title = element_text(hjust = 0.5))
data_rpce_services_gdp
#no. of seasonal difference
data_rpce_services_gdp %>% mutate(t_Expenditure = log(Expenditure)) %>%
features(t_Expenditure, unitroot_nsdiffs)
## # A tibble: 1 x 1
## nsdiffs
## <int>
## 1 1
#no. of regular difference
data_rpce_services_gdp %>% mutate(d_Expenditure = difference(log(Expenditure),4)) %>% features(d_Expenditure, unitroot_ndiffs)
## # A tibble: 1 x 1
## ndiffs
## <int>
## 1 0
data_rpce_services_gdp %>%
autoplot((Expenditure)%>% difference(lag=4))+
ggtitle("Real personal consumption Expenditure:Services(Differenced timeseries)")+ylab("% of GDP")
#KPSS on box-cox transformed seasonal differenced time series
data_rpce_services_gdp %>% mutate(t_Expenditure = difference((Expenditure),4)) %>%
features(t_Expenditure, unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.179 0.1
ACF plot
PACF plot
data_rpce_services_gdp_fit <- data_rpce_services_gdp %>%
model(arima110 = ARIMA(Expenditure ~ pdq(1,1,0)),
arima010 = ARIMA(Expenditure ~ pdq(0,1,0)),
stepwise = ARIMA(Expenditure),
auto = ARIMA(Expenditure, stepwise=FALSE))
report (data_rpce_services_gdp_fit)
## # A tibble: 4 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima110 0.165 -40.0 86.1 86.4 93.1 <cpl [1]> <cpl [4]>
## 2 arima010 0.163 -40.0 84.1 84.3 88.8 <cpl [0]> <cpl [4]>
## 3 stepwise 0.157 -38.5 83.0 83.4 90.1 <cpl [1]> <cpl [4]>
## 4 auto 0.157 -38.5 83.0 83.4 90.1 <cpl [1]> <cpl [4]>
rcpe_services_fit <- data_rpce_services_gdp %>%
model(ARIMA((Expenditure))) %>%
report(fit)
## Series: Expenditure
## Model: ARIMA(1,0,0)(0,1,1)[4]
## Transformation: (Expenditure)
##
## Coefficients:
## ar1 sma1
## 0.8994 -0.6995
## s.e. 0.0552 0.1179
##
## sigma^2 estimated as 0.1571: log likelihood=-38.51
## AIC=83.03 AICc=83.36 BIC=90.06
Best model - ARIMA(1,0,0)(0,1,1)[4]
rcpe_services_fit %>% gg_tsresiduals()
#Doing Ljung box test on residuals
Box.test(augment(rcpe_services_fit)$.resid, lag=36, fitdf = 2, type = "Lj")
##
## Box-Ljung test
##
## data: augment(rcpe_services_fit)$.resid
## X-squared = 25.831, df = 34, p-value = 0.8415
final_arima <-data_rpce_services_gdp %>%
model(ARIMA((Expenditure)))
final_arima %>%
forecast(h = "1 years") %>%
autoplot(data_rpce_services_gdp, level = NULL) + labs(title = "") +
theme(plot.title = element_text(hjust = 0.5))
Observations
5.2.Real Net Exports of Goods and Services
data_netexp_gdp %>% autoplot(.vars=Expenditure) + labs(title = "Net Exports of Goods and Services") +
theme(plot.title = element_text(hjust = 0.5))
#no. of seasonal difference
data_netexp_gdp %>% mutate(t_Expenditure = (Expenditure)) %>%
features(t_Expenditure, unitroot_nsdiffs)
## # A tibble: 1 x 1
## nsdiffs
## <int>
## 1 1
#no. of regular difference
data_netexp_gdp %>% mutate(d_Expenditure = difference(Expenditure,4)) %>% features(d_Expenditure, unitroot_ndiffs)
## # A tibble: 1 x 1
## ndiffs
## <int>
## 1 1
data_netexp_gdp %>%
autoplot(Expenditure%>% difference(lag=4)%>% difference())+
ggtitle("Net Exports as % of GDP: Services(Differenced timeseries)")+ylab("% of GDP")
#KPSS on transformed seasonal differenced time series
data_netexp_gdp %>% mutate(t_Expenditure = difference(difference(Expenditure,4)))%>%
features(t_Expenditure, unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.107 0.1
#checking residuals of differenced time series
data_netexp_gdp %>%
gg_tsdisplay(Expenditure %>% difference(lag=4)%>%difference(), plot_type="partial")+
ggtitle("Residuals of differenced Net Exports: Services")
data_netexp_gdp_fit <- data_netexp_gdp %>%
model(arima010 = ARIMA(Expenditure ~ pdq(0,1,0)),
arima012 = ARIMA(Expenditure ~ pdq(0,1,2)),
stepwise = ARIMA(Expenditure),
auto = ARIMA(Expenditure, stepwise=FALSE))
report (data_netexp_gdp_fit)
## # A tibble: 4 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima010 0.0943 -18.8 41.7 41.8 46.3 <cpl [0]> <cpl [4]>
## 2 arima012 0.0944 -17.6 43.3 43.9 52.6 <cpl [0]> <cpl [6]>
## 3 stepwise 0.0943 -18.8 41.7 41.8 46.3 <cpl [0]> <cpl [4]>
## 4 auto 0.0828 -12.3 38.5 40.2 54.8 <cpl [1]> <cpl [5]>
netexp_gdp_fit <- data_netexp_gdp_fit%>%select(auto) %>%
report(netexp_gdp_fit)
## Series: Expenditure
## Model: ARIMA(1,1,5)(0,1,0)[4]
##
## Coefficients:
## ar1 ma1 ma2 ma3 ma4 ma5
## -0.3023 0.5188 0.1410 0.1866 -0.5756 -0.5706
## s.e. 0.2872 0.2577 0.1204 0.1362 0.1287 0.1386
##
## sigma^2 estimated as 0.08283: log likelihood=-12.26
## AIC=38.52 AICc=40.16 BIC=54.83
Best ARIMA model-RPCE: ARIMA(1,1,5)(0,1,0)[4]
netexp_gdp_fit %>% gg_tsresiduals()
#Doing Ljung box test on residuals
Box.test(augment(netexp_gdp_fit)$.resid, lag=36, fitdf = 6, type = "Lj")
##
## Box-Ljung test
##
## data: augment(netexp_gdp_fit)$.resid
## X-squared = 20.211, df = 30, p-value = 0.9109
Observations
netexp_gdp_fit %>%
forecast(h = "1 year") %>%
autoplot(data_netexp_gdp, level = NULL) + labs(title = "") +
theme(plot.title = element_text(hjust = 0.5))
5.3.All Employees, Service-Providing
data_ae_sp_1 <- data_ae_sp %>%mutate(Month = yearmonth(DATE)) %>%
as_tsibble(index=Month)
data_ae_sp_1 %>% autoplot(.vars=SRVPRD) + labs(title = "All Employees, Service-Providing") +
theme(plot.title = element_text(hjust = 0.5))
#no. of seasonal difference
data_ae_sp_1 %>% mutate(t_SRVPRD = SRVPRD) %>%
features(t_SRVPRD, unitroot_nsdiffs)
## # A tibble: 1 x 1
## nsdiffs
## <int>
## 1 0
#no. of regular difference
data_ae_sp_1 %>% mutate(t_SRVPRD = difference(SRVPRD,12)) %>% features(t_SRVPRD, unitroot_ndiffs)
## # A tibble: 1 x 1
## ndiffs
## <int>
## 1 0
data_ae_sp_1 %>%
gg_tsdisplay(SRVPRD, plot_type='partial')
data_ae_sp_fit <- data_ae_sp_1 %>%
model(arima111 = ARIMA(SRVPRD ~ pdq(1,1,1)),
arima012 = ARIMA(SRVPRD ~ pdq(0,1,2)),
stepwise = ARIMA(SRVPRD),
auto = ARIMA(SRVPRD, stepwise=FALSE))
report (data_ae_sp_fit)
## # A tibble: 4 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima111 370517. -7821. 15650. 15650. 15670. <cpl [1]> <cpl [1]>
## 2 arima012 364657. -7813. 15634. 15634. 15654. <cpl [0]> <cpl [2]>
## 3 stepwise 364657. -7813. 15634. 15634. 15654. <cpl [0]> <cpl [2]>
## 4 auto 364657. -7813. 15634. 15634. 15654. <cpl [0]> <cpl [2]>
ae_sp_fit <- data_ae_sp %>%
model(ARIMA(SRVPRD)) %>%
report(ae_sp_fit)
## Series: SRVPRD
## Model: ARIMA(0,1,2) w/ drift
##
## Coefficients:
## ma1 ma2 constant
## 0.0405 -0.1743 111.5975
## s.e. 0.0316 0.0328 16.5297
##
## sigma^2 estimated as 364657: log likelihood=-7813
## AIC=15634 AICc=15634.04 BIC=15653.63
Best Model: ARIMA(0,1,2) w/ drift
ae_sp_fit %>% gg_tsresiduals()
ae_sp_final_arima <-data_ae_sp %>%
model(ARIMA(SRVPRD))
ae_sp_final_arima %>%
forecast(h = "1 years") %>%
autoplot(data_ae_sp, level = NULL) + labs(title = "") +
theme(plot.title = element_text(hjust = 0.5))
* Observations
theme_set(theme_bw()) # this is a base theme I like
theme_update(axis.title = element_text(size = 11), axis.text = element_text(size = 10),
legend.text = element_text(size = 10), legend.title = element_text(size = 10),
panel.grid.major = element_blank(), panel.grid.minor = element_blank())
6.1.DURABLE GOODS
6.1.1.Real Personal Consumption Expenditure: Durable Goods and Real Personal Disposable Income
Real Personal Consumption Expenditure
Real personal consumption expenditures (PCE) is the primary measure of consumer spending on goods and services in the U.S. economy
The chart below shows what percentage of GDP is driven by (inflation-adjusted) personal consumption expenditures on Durable Goods
Consumer spending generally follows the pattern of the business cycle. During economic downturns, consumer spending typically decreases as unemployment increases and personal income decreases. Although, during expansions, consumer spending increases. A strong economy can impact consumer sentiment and spending
Spending on durable goods decrease relatively more in any recession as compared to non-durable goods as people tend to delay their big purchases like car, household appliance, etc. until the economy improves
Although the above pattern has held in previous recessions, expenditures on non-durable goods have experienced a larger decline than spending on durable goods in the current COVID-19 recession
The spending pattern during the 2007-2009 financial crisis followed the traditional pattern and showed a larger decrease in spending on durable. But the current COVID-19 recession shows the opposite and apart from a month drop in expenditure on durables, it rises sharply following a major portion of GDP. It was clear to people that it is going to stay for long. So, as expected people started spending more in durable goods. Lockdown and social distancing shifted consumer demand away from services toward durable goods
Swift action by the Federal Reserve to lower interest rates and the economic impact payments that went to a portion of the population may have helped bolster spending on durable goods
Between 2005 and 2021, there is a clear upward trend except during the financial crisis and recession of 2007-2009 and a short recession caused by Covid-19 pandemic, signifying that consumers are spending a larger percentage of their real income on durable goods. Moreover, our forecast shows a continuation of the upward trend
This series has a strong seasonality in it, with Q4 seems to have a peak in expenditure due to holiday season
Consumer demand for durable goods has increased substantially in the last decade, and shows signs of continuing on this trajectory in the future
RPCE Forecasts
#Since best model for rpce was ETS(AAA) when we compared different models, so using it
rpce_model <- rpce_durable_goods_gdp %>%
model(AAA = ETS(box_cox(Expenditure,lambda1) ~ error("A") + trend("A") + season("A")))
forecast1 <- rpce_model %>% forecast(h = "3 years")
f1<- forecast1 %>% autoplot(rpce_durable_goods_gdp) +
labs(y = "Expenditure(%of GDP)", title = "RPCE: Durable Goods")
#Plotting Real disposable income as well
rdpi_model<- rdpi %>%
model(AAN = ETS(Income ~ error("A") + trend("A") + season("N")))
fc <- rdpi_model %>% forecast(h = "3 years")
f2<- fc %>% autoplot(rdpi %>% filter(year(Quarter)>2000)) +
labs(y = "Income", title = "Real Personal Disposable Income")
grid.arrange(f1,f2,nrow=2)
Real disposable income
Consumer Spending and Monetary and Fiscal Policy
6.1.2.Industrial Production, PPI, and CPI: Durable Goods
Industrial Production: Durable goods
monthly_report_IP1 <- industrial_prod_durable_goods %>%
mutate(MoM = (Index - lag(Index)) / lag(Index))
yearly_report_IP1 <- industrial_prod_durable_goods %>%
mutate(YoY = (Index - lag(Index, 12)) / lag(Index, 12))
i1<- ggplot(yearly_report_IP1, aes(x=Month, y=YoY)) +
geom_line(position="dodge") +
scale_y_continuous(labels = scales::percent) + labs(y = "% change") + ggtitle("Industrial production: Durable goods(% change from year ago)")
i2<- ggplot(monthly_report_IP1, aes(x=Month, y=MoM)) +
geom_line(position="dodge") +
scale_y_continuous(labels = scales::percent) + labs(y = "% change") + ggtitle("Industrial production: Durable goods(%change from previous month)")
grid.arrange(i1, i2, nrow =2)
Industrial production: Durable goods Forecasts
This prediction will be helpful….
#Since best model for industrial production was auto arima model when we compared different models, so using it
ip_model <- industrial_prod_durable_goods %>%
model(MAdM = ETS(log(Index) ~ error("M") + trend("Ad") + season("M")))
forecast2 <- ip_model %>% forecast(h = "15 months")
fc_ip_durable <- forecast2 %>% as_tsibble(index = Month) %>% mutate(Index = .mean) %>% select(Month,Index)
ip_durable_fc <- bind_rows(industrial_prod_durable_goods,fc_ip_durable)
monthly_report_IP <- ip_durable_fc %>%
mutate(MoM = (Index - lag(Index)) / lag(Index))%>% select(Month, MoM)
yearly_report_IP <- ip_durable_fc %>%
mutate(YoY = (Index - lag(Index, 12))/ lag(Index, 12))%>% select(Month, YoY)
i3<- ggplot(NULL, aes(Month, YoY)) +
geom_line(data = yearly_report_IP %>% filter_index("2015 Jan" ~ "2022 Mar"))+
geom_line(data = yearly_report_IP %>% filter_index("2022 Apr" ~ .), color = "red")+
scale_y_continuous(labels = scales::percent) + labs(y = "% change") + ggtitle("Industrial production: Durable goods(%change from previous year)")
i4<- ggplot(NULL, aes(Month, MoM)) +
geom_col(data = monthly_report_IP %>% filter_index("2020 Jan" ~ "2022 Mar"), fill ="blue")+
geom_col(data = monthly_report_IP %>% filter_index("2022 Apr" ~ .), fill = "red")+
scale_y_continuous(labels = scales::percent) + labs(y = "% change") + ggtitle("Industrial production: Durable goods(%change from previous month)")
grid.arrange(i3, i4, nrow = 2)
Producer Price Index: Durable Goods
monthly_report_PPI1 <- PPI_durable_goods %>%
mutate(MoM = (Index - lag(Index)) / lag(Index))
yearly_report_PPI1 <- PPI_durable_goods %>%
mutate(YoY = (Index - lag(Index, 12)) / lag(Index, 12))
colors <- c("PPI:Durable goods" = "blue", "Industrial Production" = "red")
a1<- ggplot(NULL, aes(Month,YoY)) +
geom_line(data = yearly_report_IP1, aes(color = "Industrial Production"))+
geom_line(data = yearly_report_PPI1, aes(color = "PPI:Durable goods"))+
scale_y_continuous(labels = scales::percent) +
labs(x = "Month", y = "log(Index)" ,color = "PPI/Industrial Production")+
ggtitle("PPI by commodity(Final Demand) and Industrial production: Durable Goods")+
labs(y = "% change YoY", color="Legend text")
a2<- ggplot(NULL, aes(Month,Index)) +
geom_line(data = industrial_prod_durable_goods, aes(col = "Industrial Production"))+
geom_line(data = PPI_durable_goods, aes(col = "PPI:Durable goods"))+
labs(x = "Month", y = "Index 1982=100" ,color = "PPI/Industrial Production")+
ggtitle("PPI by commodity(Final Demand) and Industrial production : Durable Goods")+
scale_color_manual(values = colors)
grid.arrange(a1,a2,nrow =2)
PPI and Forecasts
This prediction will be helpful…
#Since best model for PPI was auto arima model when we compared different models, so using it
ppi_model <- PPI_durable_goods %>%
model(arima_model = ARIMA(log(Index), stepwise = FALSE, approx = FALSE))
forecast3 <- ppi_model %>% forecast(h = "2 years")
fc_ppi_durable <- forecast3 %>% as_tsibble(index = Month) %>% mutate(Index = .mean) %>% select(Month,Index)
ppi_durable_fc <- bind_rows(PPI_durable_goods,fc_ppi_durable)
monthly_report_PPI <- ppi_durable_fc %>%
mutate(MoM = (Index - lag(Index)) / lag(Index))%>% select(Month, MoM)
yearly_report_PPI <- ppi_durable_fc %>%
mutate(YoY = (Index - lag(Index, 12)) / lag(Index, 12))%>% select(Month, YoY)
p3<- ggplot(NULL, aes(Month, YoY)) +
geom_col(data = yearly_report_PPI %>% filter_index("2019 Jan" ~ "2022 Mar"), fill ="blue")+
geom_col(data = yearly_report_PPI %>% filter_index("2022 Apr" ~ .), fill = "red")+
scale_y_continuous(labels = scales::percent) + labs(y = "% change") + ggtitle("PPI: Durable goods(%change from previous year)")
p4<- ggplot(NULL, aes(Month, MoM)) +
geom_col(data = monthly_report_PPI %>% filter_index("2020 Jan" ~ "2022 Mar"), fill ="blue")+
geom_col(data = monthly_report_PPI %>% filter_index("2022 Apr" ~ .), fill = "red")+
scale_y_continuous(labels = scales::percent) + labs(y = "% change") + ggtitle("PPI: Durable goods(%change from previous month)")
grid.arrange(p3, p4, nrow = 2)
Consumer Price Index: Durable goods
CPI is a measure of the total value of goods and services consumers have bought over a specified period
PPI serves as a leading indicator in CPI. When producers are faced with input inflation, those rising costs are passed along to the retailers and eventually to the consumer
In first graph below we can see that 12-month percent change for the two series is in tandem, PPI leading CPI with few periods, which justify what we said above that PPI serves as leading indicator in CPI
Generally, tracking PPI allows one to determine the cause of the changes in CPI. Say, if CPI and PPI are in tandem then that tells that retailers are attempting to maintain their operating margins. But if CPI increases at a faster rate than PPI, that will indicate factors other than inflation that are causing retailers to increase price, like we see during Covid
Overlaying the CPI and PPI for Durable Goods below, we can see that the two trendlines track one another closely until the early 2000’s, when they start to diverge with PPI increasing at higher rates than CPI
monthly_report_CPI1 <- CPI_durable_goods %>%
mutate(MoM = (Index - lag(Index)) / lag(Index))
yearly_report_CPI1 <- CPI_durable_goods %>%
mutate(YoY = (Index - lag(Index, 12)) / lag(Index, 12))
colors <- c("PPI:Durable goods" = "blue", "CPI:Durable" = "red")
a1<- ggplot(NULL, aes(Month,YoY)) +
geom_line(data = yearly_report_CPI1, aes(color = "CPI:Durable"))+
geom_line(data = yearly_report_PPI1, aes(color = "PPI:Durable goods"))+
scale_y_continuous(labels = scales::percent) +
labs(x = "Month", y = "log(Index)" ,color = "PPI/CPI:Durable")+
ggtitle("PPI by commodity(Final Demand) and CPI: Durable Goods")+
labs(y = "% change YoY", color="PPI/CPI")
#plotting PPI and CPI
colors <- c("PPI:Durable goods" = "blue", "CPI:Durable" = "red")
a2<- ggplot(NULL, aes(Month,Index)) +
geom_line(data = CPI_durable_goods, aes(col = "CPI:Durable"))+
geom_line(data = PPI_durable_goods, aes(col = "PPI:Durable goods"))+
labs(x = "Month", y = "Index 1982=100" ,color = "PPI/CPI")+
ggtitle("PPI by commodity(Final Demand) and CPI : Durable Goods")+
scale_color_manual(values = colors)
grid.arrange(a1,a2,nrow=2)
CPI and forecasts
As for PPI, CPI will also help federal reserve to make monetary policy. Like lately, due to inflation at its peak, federal reserve have responded to the problem by interest hikes, and we can expect more until it comes down to the central bank goal of 2%
#Since best model for CPI was ETS(A,Ad,N) model when we compared different models, so using it
cpi_model <- CPI_durable_goods %>%
model(AAdN = ETS(log(Index) ~ error("A") + trend("Ad") + season("N")))
forecast4 <- cpi_model %>% forecast(h = "1 years")
fc_cpi_durable <- forecast4 %>% as_tsibble(index = Month) %>% mutate(Index = .mean) %>% select(Month,Index)
cpi_durable_fc <- bind_rows(CPI_durable_goods,fc_cpi_durable)
monthly_report_CPI <- cpi_durable_fc %>%
mutate(MoM = (Index - lag(Index)) / lag(Index))
yearly_report_CPI <- cpi_durable_fc %>%
mutate(YoY = (Index - lag(Index, 12)) / lag(Index, 12))
c3<- ggplot(NULL, aes(Month, YoY)) +
geom_col(data = yearly_report_CPI %>% filter_index("2019 Jan" ~ "2022 Mar"), fill ="blue")+
geom_col(data = yearly_report_CPI %>% filter_index("2022 Apr" ~ .), fill = "red")+
scale_y_continuous(labels = scales::percent) + labs(y = "% change") + ggtitle("CPI: Durable goods(%change from previous year)")
c4<- ggplot(NULL, aes(Month, MoM)) +
geom_col(data = monthly_report_CPI %>% filter_index("2020 Jan" ~ "2022 Mar"), fill ="blue")+
geom_col(data = monthly_report_CPI %>% filter_index("2022 Apr" ~ .), fill = "red")+
scale_y_continuous(labels = scales::percent) + labs(y = "% change") + ggtitle("CPI: Durable goods(%change from previous month)")
grid.arrange(c3, c4, nrow = 2)
* Index Forecasts
f3<- forecast2 %>% autoplot(industrial_prod_durable_goods %>% filter(year(Month)>2000)) +
labs(y = "Index 1982=100", title = "Indusrtial Production: Durable Goods")
f4<- forecast3 %>% autoplot(PPI_durable_goods%>% filter(year(Month)>2000))+
labs(y = "Index 1982=100", title = "Producer Price Index by Commodity: Durable Goods")
f5<- forecast4 %>% autoplot(CPI_durable_goods %>% filter(year(Month)>2000))+
labs(y = "Index 1982=100", title = "Consumer Price Index for all Urban Consumers: Durable Goods")
grid.arrange(f3,f4,f5,nrow=3)
6.2.NON-DURABLE GOODS
Consumer non-durable goods are purchased for immediate or almost immediate consumption and have a life span ranging from minutes to three years. Common examples of these are food, beverages, clothing, shoes, and gasoline.
6.2.1.Real Personal Consumption Expenditure: Non-Durable Goods
#Since best model for rpce was ETS(MNM) when we compared different models, so using it
rpce_model_n <- rpce_ndurable_goods_gdp %>%
model(AAA = ETS(Expenditure ~ error("M") + trend("N") + season("M")))
forecast5 <- rpce_model_n %>% forecast(h = "1 years")
f5<- forecast5 %>% autoplot(rpce_ndurable_goods_gdp) +
labs(y = "Expenditure(%of GDP)", title = "Real Personal Consumption Expenditure:Non-Durable Goods")
n1<- rpce_ndurable_goods_gdp %>%
gg_season(Expenditure) + ggtitle("RPCE: Non-Durable Goods")+ylab("% of GDP")
grid.arrange(f5,n1,nrow=2)
6.2.2.Industrial Production/PPI and PPI/CPI: Non-Durable Goods
#plotting Industrial production and PPI
colors <- c("PPI:Non-Durable goods" = "blue", "Industrial Production" = "red")
b1<- ggplot(NULL, aes(Month,log(Index))) +
geom_line(data = industrial_prod_ndurable_goods, aes(col = "Industrial Production"))+
geom_line(data = PPI_ndurable_goods, aes(col = "PPI:Non-Durable goods"))+
labs(x = "Month", y = "log(Index)" ,color = "PPI/Industrial Production")+
ggtitle("PPI by commodity(Final Demand) and Industrial production : Non-Durable Goods")+
scale_color_manual(values = colors)
#plotting PPI and CPI
colors <- c("PPI:Non-Durable goods" = "blue", "CPI:NonDurable" = "red")
b2<- ggplot(NULL, aes(Month,log(Index))) +
geom_line(data = CPI_ndurable_goods, aes(col = "CPI:NonDurable"))+
geom_line(data = PPI_ndurable_goods, aes(col = "PPI:Non-Durable goods"))+
labs(x = "Month", y = "log(Index)" ,color = "PPI/CPI:NonDurable")+
ggtitle("PPI by commodity(Final Demand) and CPI :Non- Durable Goods")+
scale_color_manual(values = colors)
grid.arrange(b1,b2,nrow =2)
#Since best model for industrial production was ETS(MAA) model when we compared different models, so using it
ip_model1 <- industrial_prod_ndurable_goods%>%
model(MAdM = ETS(log(Index) ~ error("M") + trend("A") + season("A")))
forecast6 <- ip_model1 %>% forecast(h = "1 years")
forecast6 <- forecast6 %>% as_tsibble(index = Month) %>% mutate(Index = .mean) %>% select(Month,Index)
ip_nondurable_fc <- bind_rows(industrial_prod_ndurable_goods,forecast6)
yearly_IP_non <- ip_nondurable_fc %>%
mutate(YoY = (Index - lag(Index, 12))/ lag(Index, 12))%>% select(Month, YoY)
ni1<- ggplot(NULL, aes(Month, YoY)) +
geom_line(data = yearly_IP_non %>% filter_index("2015 Jan" ~ "2022 Mar"))+
geom_line(data = yearly_IP_non %>% filter_index("2022 Apr" ~ .), color = "red")+
scale_y_continuous(labels = scales::percent) + labs(y = "% change") + ggtitle("Industrial production: Non-Durable goods(%change from previous year)")
#Since best model for PPI was auto arima model when we compared different models, so using it
ppi_model_non <- PPI_ndurable_goods %>%
model(arima_model = ARIMA(log(Index), stepwise = FALSE, approx = FALSE))
forecast7 <- ppi_model_non %>% forecast(h = "1 years")
forecast7 <- forecast7 %>% as_tsibble(index = Month) %>% mutate(Index = .mean) %>% select(Month,Index)
ppi_nondurable_fc <- bind_rows(PPI_ndurable_goods,forecast7)
yearly_PPI_non <- ppi_nondurable_fc %>%
mutate(YoY = (Index - lag(Index, 12))/ lag(Index, 12))%>% select(Month, YoY)
ni2<- ggplot(NULL, aes(Month, YoY)) +
geom_line(data = yearly_PPI_non %>% filter_index("2015 Jan" ~ "2022 Mar"))+
geom_line(data = yearly_PPI_non %>% filter_index("2022 Apr" ~ .), color = "red")+
scale_y_continuous(labels = scales::percent) + labs(y = "% change") + ggtitle("PPI: Non-Durable goods(%change from previous year)")
#Since best model for CPI was arima auto model when we compared different models, so using it
cpi_model1 <- CPI_ndurable_goods %>%
model(arima_model = ARIMA(log(Index), stepwise = FALSE, approx = FALSE))
forecast8 <- cpi_model1 %>% forecast(h = "1 years")
forecast8 <- forecast8 %>% as_tsibble(index = Month) %>% mutate(Index = .mean) %>% select(Month,Index)
cpi_nondurable_fc <- bind_rows(CPI_ndurable_goods,forecast8)
yearly_CPI_non <- cpi_nondurable_fc %>%
mutate(YoY = (Index - lag(Index, 12))/ lag(Index, 12))%>% select(Month, YoY)
ni3<- ggplot(NULL, aes(Month, YoY)) +
geom_line(data = yearly_CPI_non %>% filter_index("2015 Jan" ~ "2022 Mar"))+
geom_line(data = yearly_CPI_non %>% filter_index("2022 Apr" ~ .), color = "red")+
scale_y_continuous(labels = scales::percent) + labs(y = "% change") + ggtitle("CPI: Non-Durable goods(%change from previous year)")
grid.arrange(ni1,ni2,ni3,nrow=3)
6.3.SERVICES
6.3.1.Real Personal Consumption Expenditure: Services
#Since best model for rpce was auto arima when we compared different models, so using it
rpce_model_s <- data_rpce_services_gdp %>%
model(arima_model = ARIMA(Expenditure, stepwise = FALSE, approx = FALSE))
forecast9 <- rpce_model_s %>% forecast(h = "1 years")
f9<- forecast9 %>% autoplot(data_rpce_services_gdp) +
labs(y = "Expenditure(%of GDP)", title = "Real Personal Consumption Expenditure:Services")
a1<- data_rpce_services_gdp %>% gg_season(Expenditure) + ggtitle("RPCE: Services")+ylab("% of GDP")
grid.arrange(f9,a1,nrow=2)
The pandemic sparked a remarkable change in consumer spending patterns. Spending on durable consumer goods jumped 103Billion dollars in 2020,while spending on services fell 556 billion during the same period. Surveys show that the people have substituted bicycles and gym equipment for restaurants, travel and entertainment. Once people start spending on services, we might see them buying fewer goods. As we can see from the durable goods forecast that it suggests that the people are less likely to buy durable goods continues to fall.
6.3.2.Net Exports: Services
#Since best model for net exports was auto arima when we compared different models, so using it
netexp_model <- data_netexp_gdp %>%
model(arima_model = ARIMA(Expenditure, stepwise = FALSE, approx = FALSE))
forecast10 <- netexp_model %>% forecast(h = "1 years")
f10<- forecast10 %>% autoplot(data_netexp_gdp) +
labs(y = "Expenditure(%of GDP)", title = "Net Exports:Services")
a2<- data_netexp_gdp %>% gg_season(Expenditure) + ggtitle("Net Exports: Services")
grid.arrange(f10,a2,nrow=2)
Besides the Ukraine crisis, US exports remain substantially below the prepandemic level, and the imports are now higher than they were in late 2019 which can be inferred from the Net Exports graph. We can predict that the exports will continue to grow quicker than the imports which might help the United States in increase in the opportunities for investments outside the US and less desire to hold dollars to avoid risk which can make the united states more competitive globally
6.3.3.All Employees-Service Providing
#Since best model was auto arima when we compared different models, so using it
es_model <- data_ae_sp_1 %>%
model(arima_model = ARIMA(SRVPRD, stepwise = FALSE, approx = FALSE))
forecast11 <- es_model %>% forecast(h = "1 years")
f11<- forecast11 %>% autoplot(data_ae_sp_1) +
labs(y = "Amount", title = "All Employees-Service Providing")+ scale_y_continuous(labels = comma)
a3<- data_ae_sp_1 %>% gg_season(SRVPRD) + ggtitle("All Employees-Service Providing")+ scale_y_continuous(labels = comma)
grid.arrange(f11,a3,nrow=2)
We have seen that because of the pandemic, the employment was 10 million less than the prepandemic level, the main question was how do we get all the workers back on the job. There are several reasons to address, the generous government benefits have kept the people out of the labour force, health remains a key part amid of COVID and especially people who have not been vaccinated, child care has prevented a significant number of people from re-entering the labor force, and another major group of people who are not back in their jobs is people who were about to retire and had decided not coming back amid COVID. Most of these workers can be brought back to the workforce by enabling right compensation packages and flexible working hours and conditions.
CONCLUSION
Given the above historical time series and their associated forecasts, this study shows signs of an economic downturn in the near future. This is driven by several factors. Most notably, in tandem with unprecedented increases in the PPI and CPI over the last two years we have observed a large movement of capital away from savings and into durable and non-durable goods. In other words, we see that consumers value present day dollars above future dollars, so they are spending as opposed to saving. More concerning, there is a clear trend (which our forecasts show continuing in the coming years) of net exports decreasing. This further contributes to America’s growing budget deficit, which may ultimately likely lead to more quantitative easing and a further devaluation of the US Dollar.
In closing, the observed consumer data points to an economic downturn in the short-to-medium run. Any policy aimed at increasing our net exports that is not isolationist in nature (i.e. steep tariffs) could help curb this trend. Further, the government should continue investing in education so that our most valuable asset (high skilled labor) can be preserved and continue to grow in the coming decades.
References